Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison test/CodeGen/AMDGPU/srem.ll @ 121:803732b1fca8
LLVM 5.0
author | kono |
---|---|
date | Fri, 27 Oct 2017 17:07:41 +0900 |
parents | afa8332a0e37 |
children |
comparison
equal
deleted
inserted
replaced
120:1172e4bd9c6f | 121:803732b1fca8 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s | 1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s |
2 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s | 2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s |
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | 3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s |
4 | 4 |
5 define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { | 5 define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { |
6 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 | 6 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 |
7 %num = load i32, i32 addrspace(1) * %in | 7 %num = load i32, i32 addrspace(1) * %in |
8 %den = load i32, i32 addrspace(1) * %den_ptr | 8 %den = load i32, i32 addrspace(1) * %den_ptr |
9 %result = srem i32 %num, %den | 9 %result = srem i32 %num, %den |
10 store i32 %result, i32 addrspace(1)* %out | 10 store i32 %result, i32 addrspace(1)* %out |
11 ret void | 11 ret void |
12 } | 12 } |
13 | 13 |
14 define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { | 14 define amdgpu_kernel void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { |
15 %num = load i32, i32 addrspace(1) * %in | 15 %num = load i32, i32 addrspace(1) * %in |
16 %result = srem i32 %num, 4 | 16 %result = srem i32 %num, 4 |
17 store i32 %result, i32 addrspace(1)* %out | 17 store i32 %result, i32 addrspace(1)* %out |
18 ret void | 18 ret void |
19 } | 19 } |
20 | 20 |
21 ; FUNC-LABEL: {{^}}srem_i32_7: | 21 ; FUNC-LABEL: {{^}}srem_i32_7: |
22 ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493 | 22 ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493 |
23 ; SI: v_mul_hi_i32 {{v[0-9]+}}, [[MAGIC]], | 23 ; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]] |
24 ; SI: v_mul_lo_i32 | 24 ; SI: v_mul_lo_i32 |
25 ; SI: v_sub_i32 | 25 ; SI: v_sub_i32 |
26 ; SI: s_endpgm | 26 ; SI: s_endpgm |
27 define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { | 27 define amdgpu_kernel void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { |
28 %num = load i32, i32 addrspace(1) * %in | 28 %num = load i32, i32 addrspace(1) * %in |
29 %result = srem i32 %num, 7 | 29 %result = srem i32 %num, 7 |
30 store i32 %result, i32 addrspace(1)* %out | 30 store i32 %result, i32 addrspace(1)* %out |
31 ret void | 31 ret void |
32 } | 32 } |
33 | 33 |
34 define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { | 34 define amdgpu_kernel void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { |
35 %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 | 35 %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 |
36 %num = load <2 x i32>, <2 x i32> addrspace(1) * %in | 36 %num = load <2 x i32>, <2 x i32> addrspace(1) * %in |
37 %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr | 37 %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr |
38 %result = srem <2 x i32> %num, %den | 38 %result = srem <2 x i32> %num, %den |
39 store <2 x i32> %result, <2 x i32> addrspace(1)* %out | 39 store <2 x i32> %result, <2 x i32> addrspace(1)* %out |
40 ret void | 40 ret void |
41 } | 41 } |
42 | 42 |
43 define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { | 43 define amdgpu_kernel void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { |
44 %num = load <2 x i32>, <2 x i32> addrspace(1) * %in | 44 %num = load <2 x i32>, <2 x i32> addrspace(1) * %in |
45 %result = srem <2 x i32> %num, <i32 4, i32 4> | 45 %result = srem <2 x i32> %num, <i32 4, i32 4> |
46 store <2 x i32> %result, <2 x i32> addrspace(1)* %out | 46 store <2 x i32> %result, <2 x i32> addrspace(1)* %out |
47 ret void | 47 ret void |
48 } | 48 } |
49 | 49 |
50 define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { | 50 define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { |
51 %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 | 51 %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 |
52 %num = load <4 x i32>, <4 x i32> addrspace(1) * %in | 52 %num = load <4 x i32>, <4 x i32> addrspace(1) * %in |
53 %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr | 53 %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr |
54 %result = srem <4 x i32> %num, %den | 54 %result = srem <4 x i32> %num, %den |
55 store <4 x i32> %result, <4 x i32> addrspace(1)* %out | 55 store <4 x i32> %result, <4 x i32> addrspace(1)* %out |
56 ret void | 56 ret void |
57 } | 57 } |
58 | 58 |
59 define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { | 59 define amdgpu_kernel void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { |
60 %num = load <4 x i32>, <4 x i32> addrspace(1) * %in | 60 %num = load <4 x i32>, <4 x i32> addrspace(1) * %in |
61 %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4> | 61 %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4> |
62 store <4 x i32> %result, <4 x i32> addrspace(1)* %out | 62 store <4 x i32> %result, <4 x i32> addrspace(1)* %out |
63 ret void | 63 ret void |
64 } | 64 } |
65 | 65 |
66 define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { | 66 define amdgpu_kernel void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { |
67 %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 | 67 %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 |
68 %num = load i64, i64 addrspace(1) * %in | 68 %num = load i64, i64 addrspace(1) * %in |
69 %den = load i64, i64 addrspace(1) * %den_ptr | 69 %den = load i64, i64 addrspace(1) * %den_ptr |
70 %result = srem i64 %num, %den | 70 %result = srem i64 %num, %den |
71 store i64 %result, i64 addrspace(1)* %out | 71 store i64 %result, i64 addrspace(1)* %out |
72 ret void | 72 ret void |
73 } | 73 } |
74 | 74 |
75 define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { | 75 define amdgpu_kernel void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { |
76 %num = load i64, i64 addrspace(1) * %in | 76 %num = load i64, i64 addrspace(1) * %in |
77 %result = srem i64 %num, 4 | 77 %result = srem i64 %num, 4 |
78 store i64 %result, i64 addrspace(1)* %out | 78 store i64 %result, i64 addrspace(1)* %out |
79 ret void | 79 ret void |
80 } | 80 } |
81 | 81 |
82 define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { | 82 define amdgpu_kernel void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { |
83 %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 | 83 %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 |
84 %num = load <2 x i64>, <2 x i64> addrspace(1) * %in | 84 %num = load <2 x i64>, <2 x i64> addrspace(1) * %in |
85 %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr | 85 %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr |
86 %result = srem <2 x i64> %num, %den | 86 %result = srem <2 x i64> %num, %den |
87 store <2 x i64> %result, <2 x i64> addrspace(1)* %out | 87 store <2 x i64> %result, <2 x i64> addrspace(1)* %out |
88 ret void | 88 ret void |
89 } | 89 } |
90 | 90 |
91 define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { | 91 define amdgpu_kernel void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { |
92 %num = load <2 x i64>, <2 x i64> addrspace(1) * %in | 92 %num = load <2 x i64>, <2 x i64> addrspace(1) * %in |
93 %result = srem <2 x i64> %num, <i64 4, i64 4> | 93 %result = srem <2 x i64> %num, <i64 4, i64 4> |
94 store <2 x i64> %result, <2 x i64> addrspace(1)* %out | 94 store <2 x i64> %result, <2 x i64> addrspace(1)* %out |
95 ret void | 95 ret void |
96 } | 96 } |
97 | 97 |
98 define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { | 98 define amdgpu_kernel void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { |
99 %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 | 99 %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 |
100 %num = load <4 x i64>, <4 x i64> addrspace(1) * %in | 100 %num = load <4 x i64>, <4 x i64> addrspace(1) * %in |
101 %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr | 101 %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr |
102 %result = srem <4 x i64> %num, %den | 102 %result = srem <4 x i64> %num, %den |
103 store <4 x i64> %result, <4 x i64> addrspace(1)* %out | 103 store <4 x i64> %result, <4 x i64> addrspace(1)* %out |
104 ret void | 104 ret void |
105 } | 105 } |
106 | 106 |
107 define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { | 107 define amdgpu_kernel void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { |
108 %num = load <4 x i64>, <4 x i64> addrspace(1) * %in | 108 %num = load <4 x i64>, <4 x i64> addrspace(1) * %in |
109 %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4> | 109 %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4> |
110 store <4 x i64> %result, <4 x i64> addrspace(1)* %out | 110 store <4 x i64> %result, <4 x i64> addrspace(1)* %out |
111 ret void | 111 ret void |
112 } | 112 } |