Members/tobaru/cbc/CbC_llvm: test/CodeGen/AMDGPU/clamp-modifier.ll annotate

annotate test/CodeGen/AMDGPU/clamp-modifier.ll @ 128:c347d3398279 default tip

fix

author	mir3636
date	Wed, 06 Dec 2017 14:37:17 +0900
parents	803732b1fca8
children

rev	line source
121 803732b1fca8 LLVM 5.0 kono parents: diff changeset	1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
803732b1fca8 LLVM 5.0 kono parents: diff changeset	2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
803732b1fca8 LLVM 5.0 kono parents: diff changeset	3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
803732b1fca8 LLVM 5.0 kono parents: diff changeset	4
803732b1fca8 LLVM 5.0 kono parents: diff changeset	5 ; GCN-LABEL: {{^}}v_clamp_add_src_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	6 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	7 ; GCN-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	8 ; GCN: v_add_f32_e64 v{{[0-9]+}}, [[A]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	9 define amdgpu_kernel void @v_clamp_add_src_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	10 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	11 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	12 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	13 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	14 %add = fadd float %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	15 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	16 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	17 store float %clamp, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	18 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	19 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	20
803732b1fca8 LLVM 5.0 kono parents: diff changeset	21 ; GCN-LABEL: {{^}}v_clamp_multi_use_src_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	22 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	23 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	24 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[ADD]], [[ADD]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	25 define amdgpu_kernel void @v_clamp_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	26 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	27 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	28 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	29 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	30 %add = fadd float %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	31 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	32 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	33 store float %clamp, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	34 store volatile float %add, float addrspace(1)* undef
803732b1fca8 LLVM 5.0 kono parents: diff changeset	35 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	36 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	37
803732b1fca8 LLVM 5.0 kono parents: diff changeset	38 ; GCN-LABEL: {{^}}v_clamp_dbg_use_src_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	39 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	40 ; GCN-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	41 ; GCN: v_add_f32_e64 v{{[0-9]+}}, [[A]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	42 define amdgpu_kernel void @v_clamp_dbg_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	43 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	44 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	45 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	46 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	47 %add = fadd float %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	48 call void @llvm.dbg.value(metadata float %add, i64 0, metadata !4, metadata !9), !dbg !10
803732b1fca8 LLVM 5.0 kono parents: diff changeset	49 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	50 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	51 store float %clamp, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	52 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	53 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	54
803732b1fca8 LLVM 5.0 kono parents: diff changeset	55 ; GCN-LABEL: {{^}}v_clamp_add_neg_src_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	56 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	57 ; GCN: v_floor_f32_e32 [[FLOOR:v[0-9]+]], [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	58 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[FLOOR]], -[[FLOOR]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	59 define amdgpu_kernel void @v_clamp_add_neg_src_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	60 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	61 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	62 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	63 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	64 %floor = call float @llvm.floor.f32(float %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	65 %neg.floor = fsub float -0.0, %floor
803732b1fca8 LLVM 5.0 kono parents: diff changeset	66 %max = call float @llvm.maxnum.f32(float %neg.floor, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	67 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	68 store float %clamp, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	69 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	70 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	71
803732b1fca8 LLVM 5.0 kono parents: diff changeset	72 ; GCN-LABEL: {{^}}v_non_clamp_max_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	73 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	74 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	75 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 0, [[ADD]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	76 define amdgpu_kernel void @v_non_clamp_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	77 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	78 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	79 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	80 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	81 %add = fadd float %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	82 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	83 store float %max, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	84 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	85 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	86
803732b1fca8 LLVM 5.0 kono parents: diff changeset	87 ; GCN-LABEL: {{^}}v_clamp_add_src_f32_denormals:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	88 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	89 ; GCN: v_add_f32_e64 [[ADD:v[0-9]+]], [[A]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	90 define amdgpu_kernel void @v_clamp_add_src_f32_denormals(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	91 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	92 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	93 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	94 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	95 %add = fadd float %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	96 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	97 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	98 store float %clamp, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	99 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	100 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	101
803732b1fca8 LLVM 5.0 kono parents: diff changeset	102 ; GCN-LABEL: {{^}}v_clamp_add_src_f16_denorm:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	103 ; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	104 ; GFX89: v_add_f16_e64 [[ADD:v[0-9]+]], [[A]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	105
803732b1fca8 LLVM 5.0 kono parents: diff changeset	106 ; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	107 ; SI: v_add_f32_e64 [[ADD:v[0-9]+]], [[CVT]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	108 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[ADD]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	109 define amdgpu_kernel void @v_clamp_add_src_f16_denorm(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	110 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	111 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	112 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	113 %a = load half, half addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	114 %add = fadd half %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	115 %max = call half @llvm.maxnum.f16(half %add, half 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	116 %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	117 store half %clamp, half addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	118 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	119 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	120
803732b1fca8 LLVM 5.0 kono parents: diff changeset	121 ; GCN-LABEL: {{^}}v_clamp_add_src_f16_no_denormals:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	122 ; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	123 ; GFX89-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	124 ; GFX89: v_add_f16_e64 v{{[0-9]+}}, [[A]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	125
803732b1fca8 LLVM 5.0 kono parents: diff changeset	126 ; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	127 ; SI: v_add_f32_e64 [[ADD:v[0-9]+]], [[CVT]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	128 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[ADD]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	129 define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(half addrspace(1)* %out, half addrspace(1)* %aptr) #3 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	130 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	131 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	132 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	133 %a = load half, half addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	134 %add = fadd half %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	135 %max = call half @llvm.maxnum.f16(half %add, half 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	136 %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	137 store half %clamp, half addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	138 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	139 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	140
803732b1fca8 LLVM 5.0 kono parents: diff changeset	141 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	142 ; GCN: {{buffer\|flat\|global}}_load_dwordx2 v{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	143 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, v[[A]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	144 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, v[[B]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	145 define amdgpu_kernel void @v_clamp_add_src_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	146 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	147 %gep0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	148 %out.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	149 %a = load <2 x float>, <2 x float> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	150 %add = fadd <2 x float> %a, <float 1.0, float 1.0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	151 %max = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %add, <2 x float> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	152 %clamp = call <2 x float> @llvm.minnum.v2f32(<2 x float> %max, <2 x float> <float 1.0, float 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	153 store <2 x float> %clamp, <2 x float> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	154 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	155 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	156
803732b1fca8 LLVM 5.0 kono parents: diff changeset	157 ; GCN-LABEL: {{^}}v_clamp_add_src_f64:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	158 ; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	159 ; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], 1.0 clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	160 define amdgpu_kernel void @v_clamp_add_src_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	161 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	162 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	163 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	164 %a = load double, double addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	165 %add = fadd double %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	166 %max = call double @llvm.maxnum.f64(double %add, double 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	167 %clamp = call double @llvm.minnum.f64(double %max, double 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	168 store double %clamp, double addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	169 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	170 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	171
803732b1fca8 LLVM 5.0 kono parents: diff changeset	172 ; GCN-LABEL: {{^}}v_clamp_mac_to_mad:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	173 ; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]}} clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	174 define amdgpu_kernel void @v_clamp_mac_to_mad(float addrspace(1)* %out, float addrspace(1)* %aptr, float %a) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	175 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	176 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	177 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	178 %b = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	179
803732b1fca8 LLVM 5.0 kono parents: diff changeset	180 %mul = fmul float %a, %a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	181 %add = fadd float %mul, %b
803732b1fca8 LLVM 5.0 kono parents: diff changeset	182 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	183 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	184 %res = fadd float %clamp, %b
803732b1fca8 LLVM 5.0 kono parents: diff changeset	185 store float %res, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	186 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	187 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	188
803732b1fca8 LLVM 5.0 kono parents: diff changeset	189
803732b1fca8 LLVM 5.0 kono parents: diff changeset	190 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	191 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	192 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
803732b1fca8 LLVM 5.0 kono parents: diff changeset	193 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	194 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	195 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	196 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	197 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	198 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	199 %add = fadd <2 x half> %a, <half 1.0, half 1.0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	200 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %add, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	201 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	202 store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	203 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	204 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	205
803732b1fca8 LLVM 5.0 kono parents: diff changeset	206 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_no_denormals:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	207 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	208 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
803732b1fca8 LLVM 5.0 kono parents: diff changeset	209 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	210 define amdgpu_kernel void @v_clamp_add_src_v2f16_no_denormals(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #3 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	211 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	212 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	213 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	214 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	215 %add = fadd <2 x half> %a, <half 1.0, half 1.0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	216 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %add, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	217 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	218 store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	219 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	220 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	221
803732b1fca8 LLVM 5.0 kono parents: diff changeset	222 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	223 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	224 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
803732b1fca8 LLVM 5.0 kono parents: diff changeset	225 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	226 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	227 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	228 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	229 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	230 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	231 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	232 %add = fadd <2 x half> %a, <half 1.0, half 1.0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	233 %neg.add = fsub <2 x half> <half -0.0, half -0.0>, %add
803732b1fca8 LLVM 5.0 kono parents: diff changeset	234 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.add, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	235 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	236 store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	237 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	238 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	239
803732b1fca8 LLVM 5.0 kono parents: diff changeset	240 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg_lo:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	241 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	242 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
803732b1fca8 LLVM 5.0 kono parents: diff changeset	243 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	244 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_lo:[1,1] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	245 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	246 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	247 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	248 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	249 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	250 %add = fadd <2 x half> %a, <half 1.0, half 1.0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	251 %lo = extractelement <2 x half> %add, i32 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	252 %neg.lo = fsub half -0.0, %lo
803732b1fca8 LLVM 5.0 kono parents: diff changeset	253 %neg.lo.add = insertelement <2 x half> %add, half %neg.lo, i32 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	254 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.add, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	255 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	256 store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	257 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	258 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	259
803732b1fca8 LLVM 5.0 kono parents: diff changeset	260 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg_hi:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	261 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	262 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
803732b1fca8 LLVM 5.0 kono parents: diff changeset	263 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	264 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_hi:[1,1] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	265 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	266 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	267 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	268 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	269 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	270 %add = fadd <2 x half> %a, <half 1.0, half 1.0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	271 %hi = extractelement <2 x half> %add, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	272 %neg.hi = fsub half -0.0, %hi
803732b1fca8 LLVM 5.0 kono parents: diff changeset	273 %neg.hi.add = insertelement <2 x half> %add, half %neg.hi, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	274 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.add, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	275 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	276 store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	277 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	278 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	279
803732b1fca8 LLVM 5.0 kono parents: diff changeset	280 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_shuf:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	281 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	282 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
803732b1fca8 LLVM 5.0 kono parents: diff changeset	283 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	284 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	285 define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	286 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	287 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	288 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	289 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	290 %add = fadd <2 x half> %a, <half 1.0, half 1.0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	291 %shuf = shufflevector <2 x half> %add, <2 x half> undef, <2 x i32> <i32 1, i32 0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	292
803732b1fca8 LLVM 5.0 kono parents: diff changeset	293 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	294 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	295 store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	296 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	297 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	298
803732b1fca8 LLVM 5.0 kono parents: diff changeset	299 ; GCN-LABEL: {{^}}v_no_clamp_add_src_v2f16_f32_src:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	300 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	301 ; GFX9: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	302 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ADD]], [[ADD]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	303 define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f32_src(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	304 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	305 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	306 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	307 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	308 %bc = bitcast <2 x half> %a to float
803732b1fca8 LLVM 5.0 kono parents: diff changeset	309 %f32.op = fadd float %bc, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	310 %f32.op.cast = bitcast float %f32.op to <2 x half>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	311 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %f32.op.cast, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	312 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	313 store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	314 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	315 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	316
803732b1fca8 LLVM 5.0 kono parents: diff changeset	317 ; GCN-LABEL: {{^}}v_no_clamp_add_packed_src_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	318 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	319 ; GFX9-DAG: s_mov_b32 [[ONE:s[0-9]+]], 0x3c003c00
803732b1fca8 LLVM 5.0 kono parents: diff changeset	320 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], [[ONE]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	321 ; GFX9: v_max_f32_e64 [[CLAMP:v[0-9]+]], [[ADD]], [[ADD]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	322 define amdgpu_kernel void @v_no_clamp_add_packed_src_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	323 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	324 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	325 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	326 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	327 %add = fadd <2 x half> %a, <half 1.0, half 1.0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	328 %bc.add = bitcast <2 x half> %add to float
803732b1fca8 LLVM 5.0 kono parents: diff changeset	329 %max = call float @llvm.maxnum.f32(float %bc.add, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	330 %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	331 store float %clamp, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	332 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	333 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	334
803732b1fca8 LLVM 5.0 kono parents: diff changeset	335 ; Since the high bits are zeroed, it probably would be OK in this case
803732b1fca8 LLVM 5.0 kono parents: diff changeset	336 ; to use clamp.
803732b1fca8 LLVM 5.0 kono parents: diff changeset	337 ; GCN-LABEL: {{^}}v_no_clamp_add_src_v2f16_f16_src:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	338 ; GCN-DAG: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	339 ; GFX9: v_add_f16_e32 [[ADD:v[0-9]+]], 1.0, [[A]]{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	340 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ADD]], [[ADD]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	341 define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(<2 x half> addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	342 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	343 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	344 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	345 %a = load half, half addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	346 %add = fadd half %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	347 %bc = bitcast half %add to i16
803732b1fca8 LLVM 5.0 kono parents: diff changeset	348 %zext = zext i16 %bc to i32
803732b1fca8 LLVM 5.0 kono parents: diff changeset	349 %v2f16 = bitcast i32 %zext to <2 x half>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	350 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %v2f16, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	351 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	352 store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	353 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	354 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	355
803732b1fca8 LLVM 5.0 kono parents: diff changeset	356 declare i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	357 declare float @llvm.fabs.f32(float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	358 declare float @llvm.floor.f32(float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	359 declare float @llvm.minnum.f32(float, float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	360 declare float @llvm.maxnum.f32(float, float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	361 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	362 declare double @llvm.fabs.f64(double) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	363 declare double @llvm.minnum.f64(double, double) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	364 declare double @llvm.maxnum.f64(double, double) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	365 declare half @llvm.fabs.f16(half) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	366 declare half @llvm.minnum.f16(half, half) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	367 declare half @llvm.maxnum.f16(half, half) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	368 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	369 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	370 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	371 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	372
803732b1fca8 LLVM 5.0 kono parents: diff changeset	373
803732b1fca8 LLVM 5.0 kono parents: diff changeset	374 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	375
803732b1fca8 LLVM 5.0 kono parents: diff changeset	376 attributes #0 = { nounwind }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	377 attributes #1 = { nounwind readnone }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	378 attributes #2 = { nounwind "target-features"="+fp32-denormals" }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	379 attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	380
803732b1fca8 LLVM 5.0 kono parents: diff changeset	381 !llvm.dbg.cu = !{!0}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	382 !llvm.module.flags = !{!2, !3}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	383
803732b1fca8 LLVM 5.0 kono parents: diff changeset	384 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	385 !1 = !DIFile(filename: "/tmp/foo.cl", directory: "/dev/null")
803732b1fca8 LLVM 5.0 kono parents: diff changeset	386 !2 = !{i32 2, !"Dwarf Version", i32 4}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	387 !3 = !{i32 2, !"Debug Info Version", i32 3}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	388 !4 = !DILocalVariable(name: "add", arg: 1, scope: !5, file: !1, line: 1)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	389 !5 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	390 !6 = !DISubroutineType(types: !7)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	391 !7 = !{null, !8}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	392 !8 = !DIBasicType(name: "float", size: 32, align: 32)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	393 !9 = !DIExpression()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	394 !10 = !DILocation(line: 1, column: 42, scope: !5)

Mercurial > hg > Members > tobaru > cbc > CbC_llvm

annotate test/CodeGen/AMDGPU/clamp-modifier.ll @ 128:c347d3398279 default tip