Mercurial > hg > CbC > CbC_llvm
view llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll @ 266:00f31e85ec16 default tip
Added tag current for changeset 31d058e83c98
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 14 Oct 2023 10:13:55 +0900 |
parents | 1f2b6ac9f198 |
children |
line wrap: on
line source
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH %s define amdgpu_kernel void @div_1_by_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_1_by_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v0, s2 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v2, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v2 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v0, v2 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_1_by_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v0, s2 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %div = fdiv float 1.000000e+00, %load, !fpmath !0 store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_minus_1_by_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_minus_1_by_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v0, -s2 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v2, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v2 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v0, v2 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_minus_1_by_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v0, -s2 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %div = fdiv float -1.000000e+00, %load, !fpmath !0 store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_1_by_minus_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_1_by_minus_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v0, -s2 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v2, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v2 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v0, v2 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_1_by_minus_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v0, -s2 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %neg = fneg float %load %div = fdiv float 1.000000e+00, %neg, !fpmath !0 store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_minus_1_by_minus_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v0, s2 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v2, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v2 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v0, v2 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_minus_1_by_minus_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v0, s2 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %neg = fsub float -0.000000e+00, %load %div = fdiv float -1.000000e+00, %neg, !fpmath !0 store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_v4_1_by_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_v4_1_by_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v4, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v0, s0 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v2, s1 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v1, s0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v3, s1 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v5, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v1, 0, v1 ; GCN-DENORM-NEXT: v_sub_u32_e32 v3, 0, v3 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v5, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v0, v1 ; GCN-DENORM-NEXT: v_ldexp_f32 v1, v2, v3 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v3, s3 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v6, s2 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v3, v3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v6 ; GCN-DENORM-NEXT: v_ldexp_f32 v2, v5, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v5, s3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v5, 0, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v3, v3, v5 ; GCN-DENORM-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_v4_1_by_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v4, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v0, s0 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v1, s1 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v2, s2 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v3, s3 ; GCN-FLUSH-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-FLUSH-NEXT: s_endpgm %load = load <4 x float>, ptr addrspace(1) %arg, align 16 %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %load, !fpmath !0 store <4 x float> %div, ptr addrspace(1) %arg, align 16 ret void } define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_v4_minus_1_by_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v4, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v0, -s0 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v2, -s1 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v1, s0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v3, s1 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v5, -s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v1, 0, v1 ; GCN-DENORM-NEXT: v_sub_u32_e32 v3, 0, v3 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v5, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v0, v1 ; GCN-DENORM-NEXT: v_ldexp_f32 v1, v2, v3 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v3, -s3 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v6, s2 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v3, v3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v6 ; GCN-DENORM-NEXT: v_ldexp_f32 v2, v5, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v5, s3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v5, 0, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v3, v3, v5 ; GCN-DENORM-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_v4_minus_1_by_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v4, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v0, -s0 ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v1, -s1 ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v2, -s2 ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v3, -s3 ; GCN-FLUSH-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-FLUSH-NEXT: s_endpgm %load = load <4 x float>, ptr addrspace(1) %arg, align 16 %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %load, !fpmath !0 store <4 x float> %div, ptr addrspace(1) %arg, align 16 ret void } define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_v4_1_by_minus_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v4, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v0, -s0 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v2, -s1 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v1, s0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v3, s1 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v5, -s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v1, 0, v1 ; GCN-DENORM-NEXT: v_sub_u32_e32 v3, 0, v3 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v5, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v0, v1 ; GCN-DENORM-NEXT: v_ldexp_f32 v1, v2, v3 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v3, -s3 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v6, s2 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v3, v3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v6 ; GCN-DENORM-NEXT: v_ldexp_f32 v2, v5, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v5, s3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v5, 0, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v3, v3, v5 ; GCN-DENORM-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_v4_1_by_minus_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v4, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v0, -s0 ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v1, -s1 ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v2, -s2 ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v3, -s3 ; GCN-FLUSH-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-FLUSH-NEXT: s_endpgm %load = load <4 x float>, ptr addrspace(1) %arg, align 16 %neg = fneg <4 x float> %load %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %neg, !fpmath !0 store <4 x float> %div, ptr addrspace(1) %arg, align 16 ret void } define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_v4_minus_1_by_minus_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v4, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v0, s0 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v2, s1 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v1, s0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v3, s1 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v5, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v1, 0, v1 ; GCN-DENORM-NEXT: v_sub_u32_e32 v3, 0, v3 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v5, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v0, v1 ; GCN-DENORM-NEXT: v_ldexp_f32 v1, v2, v3 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v3, s3 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v6, s2 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v3, v3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v6 ; GCN-DENORM-NEXT: v_ldexp_f32 v2, v5, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v5, s3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v5, 0, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v3, v3, v5 ; GCN-DENORM-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_v4_minus_1_by_minus_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v4, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v0, s0 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v1, s1 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v2, s2 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v3, s3 ; GCN-FLUSH-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-FLUSH-NEXT: s_endpgm %load = load <4 x float>, ptr addrspace(1) %arg, align 16 %neg = fneg <4 x float> %load %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %neg, !fpmath !0 store <4 x float> %div, ptr addrspace(1) %arg, align 16 ret void } define amdgpu_kernel void @div_v4_c_by_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_v4_c_by_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v4, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v1, s0 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v2, s1 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v1, v1 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v3, s1 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v5, -s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v3, 0, v3 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v5, v5 ; GCN-DENORM-NEXT: v_mul_f32_e32 v7, 0.5, v1 ; GCN-DENORM-NEXT: v_ldexp_f32 v1, v2, v3 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v2, s3 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v3, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v6, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v6 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v0, s0 ; GCN-DENORM-NEXT: v_ldexp_f32 v2, v5, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v5, s3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v0, 2, v0 ; GCN-DENORM-NEXT: v_mul_f32_e32 v3, -0.5, v3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v5, 2, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v7, v0 ; GCN-DENORM-NEXT: v_ldexp_f32 v3, v3, v5 ; GCN-DENORM-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_v4_c_by_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v0, 0x6f800000 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0x2f800000 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v4, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_cmp_gt_f32_e64 vcc, |s0|, v0 ; GCN-FLUSH-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc ; GCN-FLUSH-NEXT: v_cmp_gt_f32_e64 vcc, |s3|, v0 ; GCN-FLUSH-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc ; GCN-FLUSH-NEXT: v_mul_f32_e32 v0, s0, v3 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v1, s3, v5 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v6, v1 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v1, s1 ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v2, -s2 ; GCN-FLUSH-NEXT: v_add_f32_e32 v0, v0, v0 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v6, -2.0, v6 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v0, v3, v0 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v3, v5, v6 ; GCN-FLUSH-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-FLUSH-NEXT: s_endpgm %load = load <4 x float>, ptr addrspace(1) %arg, align 16 %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %load, !fpmath !0 store <4 x float> %div, ptr addrspace(1) %arg, align 16 ret void } define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_v4_c_by_minus_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v4, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v1, -s0 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v2, -s1 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v1, v1 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v3, s1 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v5, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v3, 0, v3 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v5, v5 ; GCN-DENORM-NEXT: v_mul_f32_e32 v7, 0.5, v1 ; GCN-DENORM-NEXT: v_ldexp_f32 v1, v2, v3 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e64 v2, -s3 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v3, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v6, s2 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, 0, v6 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v0, s0 ; GCN-DENORM-NEXT: v_ldexp_f32 v2, v5, v2 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v5, s3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v0, 2, v0 ; GCN-DENORM-NEXT: v_mul_f32_e32 v3, -0.5, v3 ; GCN-DENORM-NEXT: v_sub_u32_e32 v5, 2, v5 ; GCN-DENORM-NEXT: v_ldexp_f32 v0, v7, v0 ; GCN-DENORM-NEXT: v_ldexp_f32 v3, v3, v5 ; GCN-DENORM-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_v4_c_by_minus_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v0, 0x6f800000 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v2, 0x2f800000 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v4, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_cmp_gt_f32_e64 vcc, |s0|, v0 ; GCN-FLUSH-NEXT: v_cndmask_b32_e32 v3, 1.0, v2, vcc ; GCN-FLUSH-NEXT: v_cmp_gt_f32_e64 vcc, |s3|, v0 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v5, s0, v3 ; GCN-FLUSH-NEXT: v_mul_f32_e64 v6, -s0, v3 ; GCN-FLUSH-NEXT: v_cndmask_b32_e32 v7, 1.0, v2, vcc ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v5, v5 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v6, v6 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v0, s3, v7 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v8, v0 ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v1, -s1 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v2, s2 ; GCN-FLUSH-NEXT: v_sub_f32_e32 v0, v6, v5 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v0, v3, v0 ; GCN-FLUSH-NEXT: v_add_f32_e32 v3, v8, v8 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v3, v7, v3 ; GCN-FLUSH-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; GCN-FLUSH-NEXT: s_endpgm %load = load <4 x float>, ptr addrspace(1) %arg, align 16 %neg = fneg <4 x float> %load %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %neg, !fpmath !0 store <4 x float> %div, ptr addrspace(1) %arg, align 16 ret void } define amdgpu_kernel void @div_v_by_x_25ulp(ptr addrspace(1) %arg, float %num) { ; GCN-DENORM-LABEL: div_v_by_x_25ulp: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-DENORM-NEXT: s_load_dword s4, s[0:1], 0x2c ; GCN-DENORM-NEXT: v_mov_b32_e32 v0, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s0, s[2:3], 0x0 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v2, s4 ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v3, s4 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_frexp_mant_f32_e32 v1, s0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v1, v1 ; GCN-DENORM-NEXT: v_frexp_exp_i32_f32_e32 v4, s0 ; GCN-DENORM-NEXT: v_sub_u32_e32 v2, v2, v4 ; GCN-DENORM-NEXT: v_mul_f32_e32 v1, v3, v1 ; GCN-DENORM-NEXT: v_ldexp_f32 v1, v1, v2 ; GCN-DENORM-NEXT: global_store_dword v0, v1, s[2:3] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_v_by_x_25ulp: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-FLUSH-NEXT: s_load_dword s4, s[0:1], 0x2c ; GCN-FLUSH-NEXT: v_mov_b32_e32 v0, 0x6f800000 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0x2f800000 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v2, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s0, s[2:3], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_cmp_gt_f32_e64 vcc, |s0|, v0 ; GCN-FLUSH-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc ; GCN-FLUSH-NEXT: v_mul_f32_e32 v1, s0, v0 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v1, v1 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v1, s4, v1 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-FLUSH-NEXT: global_store_dword v2, v0, s[2:3] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %div = fdiv float %num, %load, !fpmath !0 store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_1_by_x_fast(ptr addrspace(1) %arg) { ; GCN-LABEL: div_1_by_x_fast: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_rcp_f32_e32 v0, s2 ; GCN-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %div = fdiv fast float 1.000000e+00, %load, !fpmath !0 store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_minus_1_by_x_fast(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_minus_1_by_x_fast: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_rcp_f32_e64 v0, -s2 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_minus_1_by_x_fast: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v0, s2 ; GCN-FLUSH-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %div = fdiv fast float -1.000000e+00, %load, !fpmath !0 store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_1_by_minus_x_fast(ptr addrspace(1) %arg) { ; GCN-LABEL: div_1_by_minus_x_fast: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_rcp_f32_e64 v0, -s2 ; GCN-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %neg = fneg float %load, !fpmath !0 %div = fdiv fast float 1.000000e+00, %neg store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_minus_1_by_minus_x_fast(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_minus_1_by_minus_x_fast: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_rcp_f32_e32 v0, s2 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_minus_1_by_minus_x_fast: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s2, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_rcp_f32_e64 v0, -s2 ; GCN-FLUSH-NEXT: v_sub_f32_e32 v0, 0x80000000, v0 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %neg = fsub float -0.000000e+00, %load, !fpmath !0 %div = fdiv fast float -1.000000e+00, %neg store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_1_by_x_correctly_rounded(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_1_by_x_correctly_rounded: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_div_scale_f32 v0, s[2:3], s4, s4, 1.0 ; GCN-DENORM-NEXT: v_div_scale_f32 v1, vcc, 1.0, s4, 1.0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v0 ; GCN-DENORM-NEXT: v_fma_f32 v3, -v0, v2, 1.0 ; GCN-DENORM-NEXT: v_fma_f32 v2, v3, v2, v2 ; GCN-DENORM-NEXT: v_mul_f32_e32 v3, v1, v2 ; GCN-DENORM-NEXT: v_fma_f32 v4, -v0, v3, v1 ; GCN-DENORM-NEXT: v_fma_f32 v3, v4, v2, v3 ; GCN-DENORM-NEXT: v_fma_f32 v0, -v0, v3, v1 ; GCN-DENORM-NEXT: v_div_fmas_f32 v0, v0, v2, v3 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: v_div_fixup_f32 v0, v0, s4, 1.0 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_1_by_x_correctly_rounded: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_div_scale_f32 v0, s[2:3], s4, s4, 1.0 ; GCN-FLUSH-NEXT: v_div_scale_f32 v1, vcc, 1.0, s4, 1.0 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v2, v0 ; GCN-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GCN-FLUSH-NEXT: v_fma_f32 v3, -v0, v2, 1.0 ; GCN-FLUSH-NEXT: v_fma_f32 v2, v3, v2, v2 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v3, v1, v2 ; GCN-FLUSH-NEXT: v_fma_f32 v4, -v0, v3, v1 ; GCN-FLUSH-NEXT: v_fma_f32 v3, v4, v2, v3 ; GCN-FLUSH-NEXT: v_fma_f32 v0, -v0, v3, v1 ; GCN-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GCN-FLUSH-NEXT: v_div_fmas_f32 v0, v0, v2, v3 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: v_div_fixup_f32 v0, v0, s4, 1.0 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %div = fdiv float 1.000000e+00, %load store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_minus_1_by_x_correctly_rounded(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_minus_1_by_x_correctly_rounded: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_div_scale_f32 v0, s[2:3], s4, s4, -1.0 ; GCN-DENORM-NEXT: v_div_scale_f32 v1, vcc, -1.0, s4, -1.0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v0 ; GCN-DENORM-NEXT: v_fma_f32 v3, -v0, v2, 1.0 ; GCN-DENORM-NEXT: v_fma_f32 v2, v3, v2, v2 ; GCN-DENORM-NEXT: v_mul_f32_e32 v3, v1, v2 ; GCN-DENORM-NEXT: v_fma_f32 v4, -v0, v3, v1 ; GCN-DENORM-NEXT: v_fma_f32 v3, v4, v2, v3 ; GCN-DENORM-NEXT: v_fma_f32 v0, -v0, v3, v1 ; GCN-DENORM-NEXT: v_div_fmas_f32 v0, v0, v2, v3 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: v_div_fixup_f32 v0, v0, s4, -1.0 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_minus_1_by_x_correctly_rounded: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_div_scale_f32 v0, s[2:3], s4, s4, -1.0 ; GCN-FLUSH-NEXT: v_div_scale_f32 v1, vcc, -1.0, s4, -1.0 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v2, v0 ; GCN-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GCN-FLUSH-NEXT: v_fma_f32 v3, -v0, v2, 1.0 ; GCN-FLUSH-NEXT: v_fma_f32 v2, v3, v2, v2 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v3, v1, v2 ; GCN-FLUSH-NEXT: v_fma_f32 v4, -v0, v3, v1 ; GCN-FLUSH-NEXT: v_fma_f32 v3, v4, v2, v3 ; GCN-FLUSH-NEXT: v_fma_f32 v0, -v0, v3, v1 ; GCN-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GCN-FLUSH-NEXT: v_div_fmas_f32 v0, v0, v2, v3 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: v_div_fixup_f32 v0, v0, s4, -1.0 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %div = fdiv float -1.000000e+00, %load store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_1_by_minus_x_correctly_rounded(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_1_by_minus_x_correctly_rounded: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_div_scale_f32 v0, s[2:3], s4, s4, -1.0 ; GCN-DENORM-NEXT: v_div_scale_f32 v1, vcc, -1.0, s4, -1.0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v0 ; GCN-DENORM-NEXT: v_fma_f32 v3, -v0, v2, 1.0 ; GCN-DENORM-NEXT: v_fma_f32 v2, v3, v2, v2 ; GCN-DENORM-NEXT: v_mul_f32_e32 v3, v1, v2 ; GCN-DENORM-NEXT: v_fma_f32 v4, -v0, v3, v1 ; GCN-DENORM-NEXT: v_fma_f32 v3, v4, v2, v3 ; GCN-DENORM-NEXT: v_fma_f32 v0, -v0, v3, v1 ; GCN-DENORM-NEXT: v_div_fmas_f32 v0, v0, v2, v3 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: v_div_fixup_f32 v0, v0, s4, -1.0 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_1_by_minus_x_correctly_rounded: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_div_scale_f32 v0, s[2:3], -s4, -s4, 1.0 ; GCN-FLUSH-NEXT: v_div_scale_f32 v1, vcc, 1.0, -s4, 1.0 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v2, v0 ; GCN-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GCN-FLUSH-NEXT: v_fma_f32 v3, -v0, v2, 1.0 ; GCN-FLUSH-NEXT: v_fma_f32 v2, v3, v2, v2 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v3, v1, v2 ; GCN-FLUSH-NEXT: v_fma_f32 v4, -v0, v3, v1 ; GCN-FLUSH-NEXT: v_fma_f32 v3, v4, v2, v3 ; GCN-FLUSH-NEXT: v_fma_f32 v0, -v0, v3, v1 ; GCN-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GCN-FLUSH-NEXT: v_div_fmas_f32 v0, v0, v2, v3 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: v_div_fixup_f32 v0, v0, -s4, 1.0 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %neg = fsub float -0.000000e+00, %load %div = fdiv float 1.000000e+00, %neg store float %div, ptr addrspace(1) %arg, align 4 ret void } define amdgpu_kernel void @div_minus_1_by_minus_x_correctly_rounded(ptr addrspace(1) %arg) { ; GCN-DENORM-LABEL: div_minus_1_by_minus_x_correctly_rounded: ; GCN-DENORM: ; %bb.0: ; GCN-DENORM-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-DENORM-NEXT: s_waitcnt lgkmcnt(0) ; GCN-DENORM-NEXT: v_div_scale_f32 v0, s[2:3], s4, s4, 1.0 ; GCN-DENORM-NEXT: v_div_scale_f32 v1, vcc, 1.0, s4, 1.0 ; GCN-DENORM-NEXT: v_rcp_f32_e32 v2, v0 ; GCN-DENORM-NEXT: v_fma_f32 v3, -v0, v2, 1.0 ; GCN-DENORM-NEXT: v_fma_f32 v2, v3, v2, v2 ; GCN-DENORM-NEXT: v_mul_f32_e32 v3, v1, v2 ; GCN-DENORM-NEXT: v_fma_f32 v4, -v0, v3, v1 ; GCN-DENORM-NEXT: v_fma_f32 v3, v4, v2, v3 ; GCN-DENORM-NEXT: v_fma_f32 v0, -v0, v3, v1 ; GCN-DENORM-NEXT: v_div_fmas_f32 v0, v0, v2, v3 ; GCN-DENORM-NEXT: v_mov_b32_e32 v1, 0 ; GCN-DENORM-NEXT: v_div_fixup_f32 v0, v0, s4, 1.0 ; GCN-DENORM-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-DENORM-NEXT: s_endpgm ; ; GCN-FLUSH-LABEL: div_minus_1_by_minus_x_correctly_rounded: ; GCN-FLUSH: ; %bb.0: ; GCN-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GCN-FLUSH-NEXT: s_waitcnt lgkmcnt(0) ; GCN-FLUSH-NEXT: v_div_scale_f32 v0, s[2:3], -s4, -s4, -1.0 ; GCN-FLUSH-NEXT: v_div_scale_f32 v1, vcc, -1.0, -s4, -1.0 ; GCN-FLUSH-NEXT: v_rcp_f32_e32 v2, v0 ; GCN-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 ; GCN-FLUSH-NEXT: v_fma_f32 v3, -v0, v2, 1.0 ; GCN-FLUSH-NEXT: v_fma_f32 v2, v3, v2, v2 ; GCN-FLUSH-NEXT: v_mul_f32_e32 v3, v1, v2 ; GCN-FLUSH-NEXT: v_fma_f32 v4, -v0, v3, v1 ; GCN-FLUSH-NEXT: v_fma_f32 v3, v4, v2, v3 ; GCN-FLUSH-NEXT: v_fma_f32 v0, -v0, v3, v1 ; GCN-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GCN-FLUSH-NEXT: v_div_fmas_f32 v0, v0, v2, v3 ; GCN-FLUSH-NEXT: v_mov_b32_e32 v1, 0 ; GCN-FLUSH-NEXT: v_div_fixup_f32 v0, v0, -s4, -1.0 ; GCN-FLUSH-NEXT: global_store_dword v1, v0, s[0:1] ; GCN-FLUSH-NEXT: s_endpgm %load = load float, ptr addrspace(1) %arg, align 4 %neg = fsub float -0.000000e+00, %load %div = fdiv float -1.000000e+00, %neg store float %div, ptr addrspace(1) %arg, align 4 ret void } !0 = !{float 2.500000e+00}