Members/tobaru/cbc/CbC_llvm: test/CodeGen/AMDGPU/clamp.ll annotate

annotate test/CodeGen/AMDGPU/clamp.ll @ 128:c347d3398279 default tip

fix

author	mir3636
date	Wed, 06 Dec 2017 14:37:17 +0900
parents	803732b1fca8
children

rev	line source
121 803732b1fca8 LLVM 5.0 kono parents: diff changeset	1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
803732b1fca8 LLVM 5.0 kono parents: diff changeset	2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s
803732b1fca8 LLVM 5.0 kono parents: diff changeset	3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
803732b1fca8 LLVM 5.0 kono parents: diff changeset	4
803732b1fca8 LLVM 5.0 kono parents: diff changeset	5 ; GCN-LABEL: {{^}}v_clamp_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	6 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	7 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	8 define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	9 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	10 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	11 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	12 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	13 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	14 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	15
803732b1fca8 LLVM 5.0 kono parents: diff changeset	16 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	17 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	18 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	19
803732b1fca8 LLVM 5.0 kono parents: diff changeset	20 ; GCN-LABEL: {{^}}v_clamp_neg_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	21 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	22 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	23 define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	24 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	25 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	26 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	27 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	28 %fneg.a = fsub float -0.0, %a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	29 %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	30 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	31
803732b1fca8 LLVM 5.0 kono parents: diff changeset	32 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	33 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	34 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	35
803732b1fca8 LLVM 5.0 kono parents: diff changeset	36 ; GCN-LABEL: {{^}}v_clamp_negabs_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	37 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	38 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	39 define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	40 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	41 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	42 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	43 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	44 %fabs.a = call float @llvm.fabs.f32(float %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	45 %fneg.fabs.a = fsub float -0.0, %fabs.a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	46
803732b1fca8 LLVM 5.0 kono parents: diff changeset	47 %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	48 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	49
803732b1fca8 LLVM 5.0 kono parents: diff changeset	50 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	51 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	52 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	53
803732b1fca8 LLVM 5.0 kono parents: diff changeset	54 ; GCN-LABEL: {{^}}v_clamp_negzero_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	55 ; GCN-DAG: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	56 ; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	57 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	58 define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	59 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	60 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	61 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	62 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	63 %max = call float @llvm.maxnum.f32(float %a, float -0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	64 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	65
803732b1fca8 LLVM 5.0 kono parents: diff changeset	66 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	67 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	68 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	69
803732b1fca8 LLVM 5.0 kono parents: diff changeset	70 ; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	71 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	72 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	73 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	74 define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	75 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	76 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	77 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	78 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	79 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	80 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	81
803732b1fca8 LLVM 5.0 kono parents: diff changeset	82 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	83 store volatile float %max, float addrspace(1)* undef
803732b1fca8 LLVM 5.0 kono parents: diff changeset	84 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	85 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	86
803732b1fca8 LLVM 5.0 kono parents: diff changeset	87 ; GCN-LABEL: {{^}}v_clamp_f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	88 ; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	89 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	90
803732b1fca8 LLVM 5.0 kono parents: diff changeset	91 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	92 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	93 define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	94 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	95 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	96 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	97 %a = load half, half addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	98 %max = call half @llvm.maxnum.f16(half %a, half 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	99 %med = call half @llvm.minnum.f16(half %max, half 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	100
803732b1fca8 LLVM 5.0 kono parents: diff changeset	101 store half %med, half addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	102 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	103 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	104
803732b1fca8 LLVM 5.0 kono parents: diff changeset	105 ; GCN-LABEL: {{^}}v_clamp_neg_f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	106 ; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	107 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	108
803732b1fca8 LLVM 5.0 kono parents: diff changeset	109 ; FIXME: Better to fold neg into max
803732b1fca8 LLVM 5.0 kono parents: diff changeset	110 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	111 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	112 define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	113 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	114 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	115 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	116 %a = load half, half addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	117 %fneg.a = fsub half -0.0, %a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	118 %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	119 %med = call half @llvm.minnum.f16(half %max, half 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	120
803732b1fca8 LLVM 5.0 kono parents: diff changeset	121 store half %med, half addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	122 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	123 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	124
803732b1fca8 LLVM 5.0 kono parents: diff changeset	125 ; GCN-LABEL: {{^}}v_clamp_negabs_f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	126 ; GCN: {{buffer\|flat\|global}}_load_ushort [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	127 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	128
803732b1fca8 LLVM 5.0 kono parents: diff changeset	129 ; FIXME: Better to fold neg/abs into max
803732b1fca8 LLVM 5.0 kono parents: diff changeset	130
803732b1fca8 LLVM 5.0 kono parents: diff changeset	131 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -\|[[A]]\| clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	132 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	133 define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	134 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	135 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	136 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	137 %a = load half, half addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	138 %fabs.a = call half @llvm.fabs.f16(half %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	139 %fneg.fabs.a = fsub half -0.0, %fabs.a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	140
803732b1fca8 LLVM 5.0 kono parents: diff changeset	141 %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	142 %med = call half @llvm.minnum.f16(half %max, half 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	143
803732b1fca8 LLVM 5.0 kono parents: diff changeset	144 store half %med, half addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	145 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	146 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	147
803732b1fca8 LLVM 5.0 kono parents: diff changeset	148 ; FIXME: Do f64 instructions support clamp?
803732b1fca8 LLVM 5.0 kono parents: diff changeset	149 ; GCN-LABEL: {{^}}v_clamp_f64:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	150 ; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	151 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	152 define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	153 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	154 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	155 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	156 %a = load double, double addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	157 %max = call double @llvm.maxnum.f64(double %a, double 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	158 %med = call double @llvm.minnum.f64(double %max, double 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	159
803732b1fca8 LLVM 5.0 kono parents: diff changeset	160 store double %med, double addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	161 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	162 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	163
803732b1fca8 LLVM 5.0 kono parents: diff changeset	164 ; GCN-LABEL: {{^}}v_clamp_neg_f64:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	165 ; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	166 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	167 define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	168 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	169 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	170 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	171 %a = load double, double addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	172 %fneg.a = fsub double -0.0, %a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	173 %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	174 %med = call double @llvm.minnum.f64(double %max, double 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	175
803732b1fca8 LLVM 5.0 kono parents: diff changeset	176 store double %med, double addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	177 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	178 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	179
803732b1fca8 LLVM 5.0 kono parents: diff changeset	180 ; GCN-LABEL: {{^}}v_clamp_negabs_f64:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	181 ; GCN: {{buffer\|flat\|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	182 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -\|[[A]]\|, -\|[[A]]\| clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	183 define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	184 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	185 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	186 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	187 %a = load double, double addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	188 %fabs.a = call double @llvm.fabs.f64(double %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	189 %fneg.fabs.a = fsub double -0.0, %fabs.a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	190
803732b1fca8 LLVM 5.0 kono parents: diff changeset	191 %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	192 %med = call double @llvm.minnum.f64(double %max, double 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	193
803732b1fca8 LLVM 5.0 kono parents: diff changeset	194 store double %med, double addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	195 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	196 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	197
803732b1fca8 LLVM 5.0 kono parents: diff changeset	198 ; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	199 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	200 ; GCN: v_med3_f32
803732b1fca8 LLVM 5.0 kono parents: diff changeset	201 define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	202 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	203 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	204 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	205 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	206 %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	207 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	208 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	209 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	210
803732b1fca8 LLVM 5.0 kono parents: diff changeset	211 ; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	212 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	213 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	214 define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	215 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	216 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	217 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	218 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	219 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	220 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	221 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	222 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	223
803732b1fca8 LLVM 5.0 kono parents: diff changeset	224 ; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	225 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	226 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	227 define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	228 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	229 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	230 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	231 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	232 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	233 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	234 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	235 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	236
803732b1fca8 LLVM 5.0 kono parents: diff changeset	237 ; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	238 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	239 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	240 define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	241 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	242 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	243 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	244 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	245 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	246 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	247 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	248 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	249
803732b1fca8 LLVM 5.0 kono parents: diff changeset	250 ; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	251 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	252 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	253 define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	254 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	255 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	256 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	257 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	258 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	259 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	260 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	261 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	262
803732b1fca8 LLVM 5.0 kono parents: diff changeset	263 ; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	264 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	265 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	266 define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	267 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	268 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	269 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	270 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	271 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	272 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	273 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	274 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	275
803732b1fca8 LLVM 5.0 kono parents: diff changeset	276 ; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	277 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	278 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	279 define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	280 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	281 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	282 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	283 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	284 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	285 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	286 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	287 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	288
803732b1fca8 LLVM 5.0 kono parents: diff changeset	289 ; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	290 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	291 define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	292 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	293 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	294 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	295 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	296 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	297 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	298
803732b1fca8 LLVM 5.0 kono parents: diff changeset	299 ; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	300 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	301 define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	302 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	303 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	304 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	305 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	306 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	307 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	308
803732b1fca8 LLVM 5.0 kono parents: diff changeset	309 ; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	310 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
803732b1fca8 LLVM 5.0 kono parents: diff changeset	311 define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	312 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	313 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	314 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	315 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	316 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	317 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	318
803732b1fca8 LLVM 5.0 kono parents: diff changeset	319 ; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	320 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	321 define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	322 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	323 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	324 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
803732b1fca8 LLVM 5.0 kono parents: diff changeset	325 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	326 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	327 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	328
803732b1fca8 LLVM 5.0 kono parents: diff changeset	329 ; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	330 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	331 define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	332 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	333 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	334 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	335 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	336 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	337 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	338
803732b1fca8 LLVM 5.0 kono parents: diff changeset	339 ; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	340 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	341 define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	342 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	343 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	344 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
803732b1fca8 LLVM 5.0 kono parents: diff changeset	345 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	346 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	347 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	348
803732b1fca8 LLVM 5.0 kono parents: diff changeset	349 ; ---------------------------------------------------------------------
803732b1fca8 LLVM 5.0 kono parents: diff changeset	350 ; Test non-default behaviors enabling snans and disabling dx10_clamp
803732b1fca8 LLVM 5.0 kono parents: diff changeset	351 ; ---------------------------------------------------------------------
803732b1fca8 LLVM 5.0 kono parents: diff changeset	352
803732b1fca8 LLVM 5.0 kono parents: diff changeset	353 ; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	354 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	355 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	356 define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	357 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	358 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	359 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	360 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	361 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	362 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	363
803732b1fca8 LLVM 5.0 kono parents: diff changeset	364 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	365 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	366 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	367
803732b1fca8 LLVM 5.0 kono parents: diff changeset	368 ; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	369 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	370 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	371 define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	372 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	373 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	374 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	375 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	376 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	377 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	378
803732b1fca8 LLVM 5.0 kono parents: diff changeset	379 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	380 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	381 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	382
803732b1fca8 LLVM 5.0 kono parents: diff changeset	383 ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	384 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	385 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	386 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	387 define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	388 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	389 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	390 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	391 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	392 %max = call float @llvm.maxnum.f32(float %a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	393 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	394
803732b1fca8 LLVM 5.0 kono parents: diff changeset	395 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	396 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	397 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	398
803732b1fca8 LLVM 5.0 kono parents: diff changeset	399 ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	400 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	401 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	402 define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	403 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	404 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	405 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	406 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	407 %add = fadd nnan float %a, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	408 %max = call float @llvm.maxnum.f32(float %add, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	409 %med = call float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	410
803732b1fca8 LLVM 5.0 kono parents: diff changeset	411 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	412 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	413 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	414
803732b1fca8 LLVM 5.0 kono parents: diff changeset	415 ; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	416 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	417 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	418 define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	419 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	420 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	421 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	422 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	423 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	424 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	425 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	426 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	427
803732b1fca8 LLVM 5.0 kono parents: diff changeset	428 ; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	429 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	430 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	431 define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	432 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	433 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	434 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	435 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	436 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	437 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	438 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	439 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	440
803732b1fca8 LLVM 5.0 kono parents: diff changeset	441 ; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	442 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	443 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	444 define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	445 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	446 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	447 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	448 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	449 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	450 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	451 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	452 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	453
803732b1fca8 LLVM 5.0 kono parents: diff changeset	454 ; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	455 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	456 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	457 define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	458 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	459 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	460 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	461 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	462 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	463 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	464 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	465 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	466
803732b1fca8 LLVM 5.0 kono parents: diff changeset	467 ; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	468 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	469 ; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	470 define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	471 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	472 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	473 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	474 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	475 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	476 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	477 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	478 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	479
803732b1fca8 LLVM 5.0 kono parents: diff changeset	480 ; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	481 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	482 ; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	483 define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	484 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	485 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	486 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	487 %a = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	488 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	489 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	490 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	491 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	492
803732b1fca8 LLVM 5.0 kono parents: diff changeset	493 ; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	494 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
803732b1fca8 LLVM 5.0 kono parents: diff changeset	495 define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	496 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	497 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	498 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	499 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	500 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	501 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	502
803732b1fca8 LLVM 5.0 kono parents: diff changeset	503 ; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	504 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
803732b1fca8 LLVM 5.0 kono parents: diff changeset	505 define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	506 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	507 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	508 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
803732b1fca8 LLVM 5.0 kono parents: diff changeset	509 store float %med, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	510 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	511 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	512
803732b1fca8 LLVM 5.0 kono parents: diff changeset	513 ; GCN-LABEL: {{^}}v_clamp_v2f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	514 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	515 ; GFX9-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	516 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	517 define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	518 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	519 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	520 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	521 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	522 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	523 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	524
803732b1fca8 LLVM 5.0 kono parents: diff changeset	525 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	526 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	527 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	528
803732b1fca8 LLVM 5.0 kono parents: diff changeset	529 ; GCN-LABEL: {{^}}v_clamp_v2f16_undef_elt:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	530 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	531 ; GFX9-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	532 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	533 define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	534 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	535 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	536 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	537 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	538 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	539 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	540
803732b1fca8 LLVM 5.0 kono parents: diff changeset	541 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	542 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	543 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	544
803732b1fca8 LLVM 5.0 kono parents: diff changeset	545 ; GCN-LABEL: {{^}}v_clamp_v2f16_not_zero:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	546 ; GFX9: v_pk_max_f16
803732b1fca8 LLVM 5.0 kono parents: diff changeset	547 ; GFX9: v_pk_min_f16
803732b1fca8 LLVM 5.0 kono parents: diff changeset	548 define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	549 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	550 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	551 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	552 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	553 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 0.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	554 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	555
803732b1fca8 LLVM 5.0 kono parents: diff changeset	556 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	557 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	558 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	559
803732b1fca8 LLVM 5.0 kono parents: diff changeset	560 ; GCN-LABEL: {{^}}v_clamp_v2f16_not_one:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	561 ; GFX9: v_pk_max_f16
803732b1fca8 LLVM 5.0 kono parents: diff changeset	562 ; GFX9: v_pk_min_f16
803732b1fca8 LLVM 5.0 kono parents: diff changeset	563 define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	564 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	565 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	566 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	567 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	568 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half 0.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	569 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 0.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	570
803732b1fca8 LLVM 5.0 kono parents: diff changeset	571 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	572 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	573 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	574
803732b1fca8 LLVM 5.0 kono parents: diff changeset	575 ; GCN-LABEL: {{^}}v_clamp_neg_v2f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	576 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	577 ; GFX9-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	578 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	579 define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	580 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	581 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	582 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	583 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	584 %fneg.a = fsub <2 x half> <half -0.0, half -0.0>, %a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	585 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.a, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	586 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	587
803732b1fca8 LLVM 5.0 kono parents: diff changeset	588 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	589 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	590 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	591
803732b1fca8 LLVM 5.0 kono parents: diff changeset	592 ; GCN-LABEL: {{^}}v_clamp_negabs_v2f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	593 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	594 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	595 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	596 define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	597 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	598 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	599 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	600 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	601 %fabs.a = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	602 %fneg.fabs.a = fsub <2 x half> <half -0.0, half -0.0>, %fabs.a
803732b1fca8 LLVM 5.0 kono parents: diff changeset	603
803732b1fca8 LLVM 5.0 kono parents: diff changeset	604 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.fabs.a, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	605 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	606
803732b1fca8 LLVM 5.0 kono parents: diff changeset	607 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	608 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	609 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	610
803732b1fca8 LLVM 5.0 kono parents: diff changeset	611 ; GCN-LABEL: {{^}}v_clamp_neglo_v2f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	612 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	613 ; GFX9-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	614 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	615 define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	616 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	617 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	618 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	619 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	620 %lo = extractelement <2 x half> %a, i32 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	621 %neg.lo = fsub half -0.0, %lo
803732b1fca8 LLVM 5.0 kono parents: diff changeset	622 %neg.lo.vec = insertelement <2 x half> %a, half %neg.lo, i32 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	623 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.vec, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	624 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	625
803732b1fca8 LLVM 5.0 kono parents: diff changeset	626 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	627 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	628 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	629
803732b1fca8 LLVM 5.0 kono parents: diff changeset	630 ; GCN-LABEL: {{^}}v_clamp_neghi_v2f16:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	631 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	632 ; GFX9-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	633 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_hi:[1,1] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	634 define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	635 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	636 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	637 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	638 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	639 %hi = extractelement <2 x half> %a, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	640 %neg.hi = fsub half -0.0, %hi
803732b1fca8 LLVM 5.0 kono parents: diff changeset	641 %neg.hi.vec = insertelement <2 x half> %a, half %neg.hi, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	642 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.vec, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	643 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	644
803732b1fca8 LLVM 5.0 kono parents: diff changeset	645 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	646 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	647 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	648
803732b1fca8 LLVM 5.0 kono parents: diff changeset	649 ; GCN-LABEL: {{^}}v_clamp_v2f16_shuffle:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	650 ; GCN: {{buffer\|flat\|global}}_load_dword [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	651 ; GFX9-NOT: [[A]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	652 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	653 define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	654 %tid = call i32 @llvm.amdgcn.workitem.id.x()
803732b1fca8 LLVM 5.0 kono parents: diff changeset	655 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	656 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
803732b1fca8 LLVM 5.0 kono parents: diff changeset	657 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	658 %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
803732b1fca8 LLVM 5.0 kono parents: diff changeset	659 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	660 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	661
803732b1fca8 LLVM 5.0 kono parents: diff changeset	662 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	663 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	664 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	665
803732b1fca8 LLVM 5.0 kono parents: diff changeset	666 ; GCN-LABEL: {{^}}v_clamp_diff_source_f32:
803732b1fca8 LLVM 5.0 kono parents: diff changeset	667 ; GCN: v_add_f32_e32 [[A:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	668 ; GCN: v_add_f32_e32 [[B:v[0-9]+]]
803732b1fca8 LLVM 5.0 kono parents: diff changeset	669 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}}
803732b1fca8 LLVM 5.0 kono parents: diff changeset	670 define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	671 {
803732b1fca8 LLVM 5.0 kono parents: diff changeset	672 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	673 %gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	674 %gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	675 %l0 = load float, float addrspace(1)* %gep0
803732b1fca8 LLVM 5.0 kono parents: diff changeset	676 %l1 = load float, float addrspace(1)* %gep1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	677 %l2 = load float, float addrspace(1)* %gep2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	678 %a = fadd nsz float %l0, %l1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	679 %b = fadd nsz float %l0, %l2
803732b1fca8 LLVM 5.0 kono parents: diff changeset	680 %res = call nsz float @llvm.maxnum.f32(float %a, float %b)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	681 %max = call nsz float @llvm.maxnum.f32(float %res, float 0.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	682 %min = call nsz float @llvm.minnum.f32(float %max, float 1.0)
803732b1fca8 LLVM 5.0 kono parents: diff changeset	683 %out.gep = getelementptr float, float addrspace(1)* %out, i32 3
803732b1fca8 LLVM 5.0 kono parents: diff changeset	684 store float %min, float addrspace(1)* %out.gep
803732b1fca8 LLVM 5.0 kono parents: diff changeset	685 ret void
803732b1fca8 LLVM 5.0 kono parents: diff changeset	686 }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	687
803732b1fca8 LLVM 5.0 kono parents: diff changeset	688 declare i32 @llvm.amdgcn.workitem.id.x() #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	689 declare float @llvm.fabs.f32(float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	690 declare float @llvm.minnum.f32(float, float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	691 declare float @llvm.maxnum.f32(float, float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	692 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	693 declare double @llvm.fabs.f64(double) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	694 declare double @llvm.minnum.f64(double, double) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	695 declare double @llvm.maxnum.f64(double, double) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	696 declare half @llvm.fabs.f16(half) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	697 declare half @llvm.minnum.f16(half, half) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	698 declare half @llvm.maxnum.f16(half, half) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	699 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	700 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	701 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
803732b1fca8 LLVM 5.0 kono parents: diff changeset	702
803732b1fca8 LLVM 5.0 kono parents: diff changeset	703 attributes #0 = { nounwind }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	704 attributes #1 = { nounwind readnone }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	705 attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	706 attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
803732b1fca8 LLVM 5.0 kono parents: diff changeset	707 attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }

Mercurial > hg > Members > tobaru > cbc > CbC_llvm

annotate test/CodeGen/AMDGPU/clamp.ll @ 128:c347d3398279 default tip