Mercurial > hg > CbC > CbC_llvm
comparison clang/lib/Headers/__clang_cuda_builtin_vars.h @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== | |
2 * | |
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
4 * See https://llvm.org/LICENSE.txt for license information. | |
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
6 * | |
7 *===-----------------------------------------------------------------------=== | |
8 */ | |
9 | |
10 #ifndef __CUDA_BUILTIN_VARS_H | |
11 #define __CUDA_BUILTIN_VARS_H | |
12 | |
13 // Forward declares from vector_types.h. | |
14 struct uint3; | |
15 struct dim3; | |
16 | |
17 // The file implements built-in CUDA variables using __declspec(property). | |
18 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx | |
19 // All read accesses of built-in variable fields get converted into calls to a | |
20 // getter function which in turn calls the appropriate builtin to fetch the | |
21 // value. | |
22 // | |
23 // Example: | |
24 // int x = threadIdx.x; | |
25 // IR output: | |
26 // %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 | |
27 // PTX output: | |
28 // mov.u32 %r2, %tid.x; | |
29 | |
30 #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ | |
31 __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ | |
32 static inline __attribute__((always_inline)) \ | |
33 __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ | |
34 return INTRINSIC; \ | |
35 } | |
36 | |
37 #if __cplusplus >= 201103L | |
38 #define __DELETE =delete | |
39 #else | |
40 #define __DELETE | |
41 #endif | |
42 | |
43 // Make sure nobody can create instances of the special variable types. nvcc | |
44 // also disallows taking address of special variables, so we disable address-of | |
45 // operator as well. | |
46 #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ | |
47 __attribute__((device)) TypeName() __DELETE; \ | |
48 __attribute__((device)) TypeName(const TypeName &) __DELETE; \ | |
49 __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ | |
50 __attribute__((device)) TypeName *operator&() const __DELETE | |
51 | |
52 struct __cuda_builtin_threadIdx_t { | |
53 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); | |
54 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); | |
55 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); | |
56 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a | |
57 // uint3). This function is defined after we pull in vector_types.h. | |
58 __attribute__((device)) operator uint3() const; | |
59 private: | |
60 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); | |
61 }; | |
62 | |
63 struct __cuda_builtin_blockIdx_t { | |
64 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); | |
65 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); | |
66 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); | |
67 // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a | |
68 // uint3). This function is defined after we pull in vector_types.h. | |
69 __attribute__((device)) operator uint3() const; | |
70 private: | |
71 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); | |
72 }; | |
73 | |
74 struct __cuda_builtin_blockDim_t { | |
75 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); | |
76 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); | |
77 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); | |
78 // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a | |
79 // dim3). This function is defined after we pull in vector_types.h. | |
80 __attribute__((device)) operator dim3() const; | |
81 private: | |
82 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); | |
83 }; | |
84 | |
85 struct __cuda_builtin_gridDim_t { | |
86 __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); | |
87 __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); | |
88 __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); | |
89 // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a | |
90 // dim3). This function is defined after we pull in vector_types.h. | |
91 __attribute__((device)) operator dim3() const; | |
92 private: | |
93 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); | |
94 }; | |
95 | |
96 #define __CUDA_BUILTIN_VAR \ | |
97 extern const __attribute__((device)) __attribute__((weak)) | |
98 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; | |
99 __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; | |
100 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; | |
101 __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; | |
102 | |
103 // warpSize should translate to read of %WARP_SZ but there's currently no | |
104 // builtin to do so. According to PTX v4.2 docs 'to date, all target | |
105 // architectures have a WARP_SZ value of 32'. | |
106 __attribute__((device)) const int warpSize = 32; | |
107 | |
108 #undef __CUDA_DEVICE_BUILTIN | |
109 #undef __CUDA_BUILTIN_VAR | |
110 #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS | |
111 | |
112 #endif /* __CUDA_BUILTIN_VARS_H */ |