Mercurial > hg > Members > tobaru > cbc > CbC_llvm
diff test/CodeGen/AMDGPU/llvm.amdgcn.class.ll @ 121:803732b1fca8
LLVM 5.0
author | kono |
---|---|
date | Fri, 27 Oct 2017 17:07:41 +0900 |
parents | 1172e4bd9c6f |
children |
line wrap: on
line diff
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll Fri Nov 25 19:14:25 2016 +0900 +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll Fri Oct 27 17:07:41 2017 +0900 @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i1 @llvm.amdgcn.class.f32(float, i32) #1 declare i1 @llvm.amdgcn.class.f64(double, i32) #1 @@ -14,7 +14,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -29,7 +29,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) #1 %result = call i1 @llvm.amdgcn.class.f32(float %a.fabs, i32 %b) #1 %sext = sext i1 %result to i32 @@ -45,7 +45,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { %a.fneg = fsub float -0.0, %a %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg, i32 %b) #1 %sext = sext i1 %result to i32 @@ -61,7 +61,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) #1 %a.fneg.fabs = fsub float -0.0, %a.fabs %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg.fabs, i32 %b) #1 @@ -76,7 +76,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -89,7 +89,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -104,7 +104,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -118,7 +118,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -132,7 +132,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -150,7 +150,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -170,7 +170,7 @@ ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -190,7 +190,7 @@ ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -205,7 +205,7 @@ ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { %a.fabs = call double @llvm.fabs.f64(double %a) #1 %result = call i1 @llvm.amdgcn.class.f64(double %a.fabs, i32 %b) #1 %sext = sext i1 %result to i32 @@ -221,7 +221,7 @@ ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { %a.fneg = fsub double -0.0, %a %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg, i32 %b) #1 %sext = sext i1 %result to i32 @@ -237,7 +237,7 @@ ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { %a.fabs = call double @llvm.fabs.f64(double %a) #1 %a.fneg.fabs = fsub double -0.0, %a.fabs %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg.fabs, i32 %b) #1 @@ -249,7 +249,7 @@ ; SI-LABEL: {{^}}test_class_1_f64: ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}} ; SI: s_endpgm -define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 { +define amdgpu_kernel void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -259,7 +259,7 @@ ; SI-LABEL: {{^}}test_class_64_f64: ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}} ; SI: s_endpgm -define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { +define amdgpu_kernel void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -275,7 +275,7 @@ ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { +define amdgpu_kernel void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -290,7 +290,7 @@ ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -306,7 +306,7 @@ ; XSI: v_cmp_class_f64_e32 vcc, 1.0, ; SI: v_cmp_class_f64_e32 vcc, ; SI: s_endpgm -define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -321,7 +321,7 @@ ; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f64: ; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} ; SI: s_endpgm -define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -338,7 +338,7 @@ ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 3{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -358,7 +358,7 @@ ; SI: v_cmp_class_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -381,7 +381,7 @@ ; SI: v_cmp_class_f32_e32 vcc, v{{[0-9]+}}, [[MASK]]{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -416,7 +416,7 @@ ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 12{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -436,7 +436,7 @@ ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -456,7 +456,7 @@ ; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 8{{$}} ; SI: s_or_b64 ; SI: s_endpgm -define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 { +define amdgpu_kernel void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -476,7 +476,7 @@ ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -488,7 +488,7 @@ ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 { +define amdgpu_kernel void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 @@ -500,7 +500,7 @@ ; SI-NOT: v_cmp_class ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, ; SI: buffer_store_dword -define void @test_class_undef_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_undef_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { %result = call i1 @llvm.amdgcn.class.f32(float undef, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4