Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison test/CodeGen/AMDGPU/coalescer_remat.ll @ 95:afa8332a0e37
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 803732b1fca8 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn-- -o - %s | FileCheck %s | |
2 | |
3 declare float @llvm.fma.f32(float, float, float) | |
4 | |
5 ; This checks that rematerialization support of the coalescer does not | |
6 ; unnecessarily widen the register class. Without those fixes > 20 VGprs | |
7 ; are used here | |
8 ; Also check that some rematerialization of the 0 constant happened. | |
9 ; CHECK-LABEL: foobar | |
10 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0 | |
11 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0 | |
12 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0 | |
13 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0 | |
14 ; It's probably OK if this is slightly higher: | |
15 ; CHECK: ; NumVgprs: 9 | |
16 define void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) { | |
17 entry: | |
18 %cmpflag = icmp eq i32 %flag, 1 | |
19 br i1 %cmpflag, label %loop, label %exit | |
20 | |
21 loop: | |
22 %c = phi i32 [0, %entry], [%cnext, %loop] | |
23 %v0 = phi float [0.0, %entry], [%fma.0, %loop] | |
24 %v1 = phi float [0.0, %entry], [%fma.1, %loop] | |
25 %v2 = phi float [0.0, %entry], [%fma.2, %loop] | |
26 %v3 = phi float [0.0, %entry], [%fma.3, %loop] | |
27 | |
28 ; Try to get the 0 constant to get coalesced into a wide register | |
29 %blup = insertelement <4 x float> undef, float %v0, i32 0 | |
30 store <4 x float> %blup, <4 x float> addrspace(1)* %out | |
31 | |
32 %load = load <4 x float>, <4 x float> addrspace(1)* %in | |
33 %load.0 = extractelement <4 x float> %load, i32 0 | |
34 %load.1 = extractelement <4 x float> %load, i32 1 | |
35 %load.2 = extractelement <4 x float> %load, i32 2 | |
36 %load.3 = extractelement <4 x float> %load, i32 3 | |
37 %fma.0 = call float @llvm.fma.f32(float %v0, float %load.0, float %v0) | |
38 %fma.1 = call float @llvm.fma.f32(float %v1, float %load.1, float %v1) | |
39 %fma.2 = call float @llvm.fma.f32(float %v2, float %load.2, float %v2) | |
40 %fma.3 = call float @llvm.fma.f32(float %v3, float %load.3, float %v3) | |
41 | |
42 %cnext = add nsw i32 %c, 1 | |
43 %cmp = icmp eq i32 %cnext, 42 | |
44 br i1 %cmp, label %exit, label %loop | |
45 | |
46 exit: | |
47 %ev0 = phi float [0.0, %entry], [%fma.0, %loop] | |
48 %ev1 = phi float [0.0, %entry], [%fma.1, %loop] | |
49 %ev2 = phi float [0.0, %entry], [%fma.2, %loop] | |
50 %ev3 = phi float [0.0, %entry], [%fma.3, %loop] | |
51 %dst.0 = insertelement <4 x float> undef, float %ev0, i32 0 | |
52 %dst.1 = insertelement <4 x float> %dst.0, float %ev1, i32 1 | |
53 %dst.2 = insertelement <4 x float> %dst.1, float %ev2, i32 2 | |
54 %dst.3 = insertelement <4 x float> %dst.2, float %ev3, i32 3 | |
55 store <4 x float> %dst.3, <4 x float> addrspace(1)* %out | |
56 ret void | |
57 } |