Mercurial > hg > CbC > CbC_llvm
view llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @ 266:00f31e85ec16 default tip
Added tag current for changeset 31d058e83c98
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 14 Oct 2023 10:13:55 +0900 |
parents | 1f2b6ac9f198 |
children |
line wrap: on
line source
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s ; RUN: opt -S -si-annotate-control-flow -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI-OPT %s define hidden void @widget() { ; GCN-LABEL: widget: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 16 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: v_writelane_b32 v40, s34, 2 ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 ; GCN-NEXT: v_writelane_b32 v40, s38, 6 ; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s40, 8 ; GCN-NEXT: v_writelane_b32 v40, s41, 9 ; GCN-NEXT: v_writelane_b32 v40, s42, 10 ; GCN-NEXT: v_writelane_b32 v40, s43, 11 ; GCN-NEXT: v_writelane_b32 v40, s44, 12 ; GCN-NEXT: v_writelane_b32 v40, s45, 13 ; GCN-NEXT: v_writelane_b32 v40, s46, 14 ; GCN-NEXT: v_writelane_b32 v40, s47, 15 ; GCN-NEXT: v_mov_b32_e32 v41, v31 ; GCN-NEXT: s_mov_b32 s42, s15 ; GCN-NEXT: s_mov_b32 s43, s14 ; GCN-NEXT: s_mov_b32 s44, s13 ; GCN-NEXT: s_mov_b32 s45, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] ; GCN-NEXT: s_mov_b64 s[4:5], 0 ; GCN-NEXT: s_mov_b64 s[8:9], -1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 ; GCN-NEXT: s_mov_b64 s[46:47], 0 ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_cbranch_vccz .LBB0_9 ; GCN-NEXT: ; %bb.1: ; %Flow ; GCN-NEXT: s_andn2_b64 vcc, exec, s[8:9] ; GCN-NEXT: s_cbranch_vccz .LBB0_10 ; GCN-NEXT: .LBB0_2: ; %Flow1 ; GCN-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-NEXT: s_cbranch_vccnz .LBB0_4 ; GCN-NEXT: .LBB0_3: ; %bb9 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] ; GCN-NEXT: s_mov_b32 s12, s45 ; GCN-NEXT: s_mov_b32 s13, s44 ; GCN-NEXT: s_mov_b32 s14, s43 ; GCN-NEXT: s_mov_b32 s15, s42 ; GCN-NEXT: v_mov_b32_e32 v31, v41 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 ; GCN-NEXT: s_mov_b64 s[4:5], 0 ; GCN-NEXT: s_andn2_b64 s[6:7], s[46:47], exec ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec ; GCN-NEXT: s_or_b64 s[46:47], s[6:7], s[8:9] ; GCN-NEXT: .LBB0_4: ; %Flow2 ; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[46:47] ; GCN-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GCN-NEXT: s_cbranch_execz .LBB0_6 ; GCN-NEXT: ; %bb.5: ; %bb12 ; GCN-NEXT: v_mov_b32_e32 v2, 0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: .LBB0_6: ; %Flow3 ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-NEXT: s_cbranch_vccnz .LBB0_8 ; GCN-NEXT: ; %bb.7: ; %bb7 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] ; GCN-NEXT: s_mov_b32 s12, s45 ; GCN-NEXT: s_mov_b32 s13, s44 ; GCN-NEXT: s_mov_b32 s14, s43 ; GCN-NEXT: s_mov_b32 s15, s42 ; GCN-NEXT: v_mov_b32_e32 v31, v41 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock ; GCN-NEXT: v_readlane_b32 s47, v40, 15 ; GCN-NEXT: v_readlane_b32 s46, v40, 14 ; GCN-NEXT: v_readlane_b32 s45, v40, 13 ; GCN-NEXT: v_readlane_b32 s44, v40, 12 ; GCN-NEXT: v_readlane_b32 s43, v40, 11 ; GCN-NEXT: v_readlane_b32 s42, v40, 10 ; GCN-NEXT: v_readlane_b32 s41, v40, 9 ; GCN-NEXT: v_readlane_b32 s40, v40, 8 ; GCN-NEXT: v_readlane_b32 s39, v40, 7 ; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 ; GCN-NEXT: v_readlane_b32 s34, v40, 2 ; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: v_readlane_b32 s4, v40, 16 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .LBB0_9: ; %bb2 ; GCN-NEXT: v_cmp_eq_u32_e64 s[46:47], 21, v0 ; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 21, v0 ; GCN-NEXT: s_mov_b64 vcc, exec ; GCN-NEXT: s_cbranch_execnz .LBB0_2 ; GCN-NEXT: .LBB0_10: ; %bb4 ; GCN-NEXT: s_mov_b64 s[4:5], -1 ; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 9, v0 ; GCN-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-NEXT: s_branch .LBB0_4 ; SI-OPT-LABEL: @widget( ; SI-OPT-NEXT: bb: ; SI-OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(1) null, align 16 ; SI-OPT-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP]], 21 ; SI-OPT-NEXT: br i1 [[TMP1]], label [[BB4:%.*]], label [[BB2:%.*]] ; SI-OPT: bb2: ; SI-OPT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP]], 21 ; SI-OPT-NEXT: br i1 [[TMP3]], label [[BB12:%.*]], label [[BB9:%.*]] ; SI-OPT: bb4: ; SI-OPT-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP]], 9 ; SI-OPT-NEXT: br i1 [[TMP5]], label [[BB7:%.*]], label [[BB9]] ; SI-OPT: bb6: ; SI-OPT-NEXT: ret void ; SI-OPT: bb7: ; SI-OPT-NEXT: [[TMP8:%.*]] = call float @wibble() ; SI-OPT-NEXT: ret void ; SI-OPT: bb9: ; SI-OPT-NEXT: [[TMP10:%.*]] = call float @wibble() ; SI-OPT-NEXT: [[TMP11:%.*]] = fcmp nsz ogt float [[TMP10]], 0.000000e+00 ; SI-OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP11]]) ; SI-OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 ; SI-OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 ; SI-OPT-NEXT: br i1 [[TMP1]], label [[BB6:%.*]], label [[BB9_BB12_CRIT_EDGE:%.*]] ; SI-OPT: bb9.bb12_crit_edge: ; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) ; SI-OPT-NEXT: br label [[BB12]] ; SI-OPT: bb12: ; SI-OPT-NEXT: store float 0.000000e+00, ptr addrspace(1) null, align 8 ; SI-OPT-NEXT: ret void bb: %tmp = load i32, ptr addrspace(1) null, align 16 %tmp1 = icmp slt i32 %tmp, 21 br i1 %tmp1, label %bb4, label %bb2 bb2: ; preds = %bb %tmp3 = icmp eq i32 %tmp, 21 br i1 %tmp3, label %bb12, label %bb9 bb4: ; preds = %bb %tmp5 = icmp eq i32 %tmp, 9 br i1 %tmp5, label %bb7, label %bb9 bb6: ; preds = %bb9 ret void bb7: ; preds = %bb4 %tmp8 = call float @wibble() ret void bb9: ; preds = %bb4, %bb2 %tmp10 = call float @wibble() %tmp11 = fcmp nsz ogt float %tmp10, 0.000000e+00 br i1 %tmp11, label %bb6, label %bb12 bb12: ; preds = %bb9, %bb2 store float 0.000000e+00, ptr addrspace(1) null, align 8 ret void } declare hidden float @wibble() local_unnamed_addr define hidden void @blam() { ; SI-OPT-LABEL: @blam( ; SI-OPT-NEXT: bb: ; SI-OPT-NEXT: [[TMP:%.*]] = load float, ptr null, align 16 ; SI-OPT-NEXT: br label [[BB2:%.*]] ; SI-OPT: bb1: ; SI-OPT-NEXT: br label [[BB2]] ; SI-OPT: bb2: ; SI-OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; SI-OPT-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) null, i32 [[TID]] ; SI-OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 16 ; SI-OPT-NEXT: store float 0.000000e+00, ptr addrspace(5) null, align 8 ; SI-OPT-NEXT: br label [[BB4:%.*]] ; SI-OPT: bb4: ; SI-OPT-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP3]], 3 ; SI-OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP5]]) ; SI-OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 ; SI-OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 ; SI-OPT-NEXT: br i1 [[TMP1]], label [[BB8:%.*]], label [[BB6:%.*]] ; SI-OPT: bb6: ; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) ; SI-OPT-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP3]], 3 ; SI-OPT-NEXT: br i1 [[TMP7]], label [[BB11:%.*]], label [[BB1:%.*]] ; SI-OPT: bb8: ; SI-OPT-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP3]], 1 ; SI-OPT-NEXT: [[TMP3:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP9]]) ; SI-OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP3]], 0 ; SI-OPT-NEXT: [[TMP5:%.*]] = extractvalue { i1, i64 } [[TMP3]], 1 ; SI-OPT-NEXT: br i1 [[TMP4]], label [[BB10:%.*]], label [[BB8_BB1_CRIT_EDGE:%.*]] ; SI-OPT: bb8.bb1_crit_edge: ; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP5]]) ; SI-OPT-NEXT: br label [[BB1]] ; SI-OPT: bb10: ; SI-OPT-NEXT: store float 0x7FF8000000000000, ptr addrspace(5) null, align 16 ; SI-OPT-NEXT: br label [[BB18:%.*]] ; SI-OPT: bb11: ; SI-OPT-NEXT: [[TMP12:%.*]] = call float @spam() ; SI-OPT-NEXT: [[TMP13:%.*]] = fcmp nsz oeq float [[TMP12]], 0.000000e+00 ; SI-OPT-NEXT: [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP13]]) ; SI-OPT-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0 ; SI-OPT-NEXT: [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1 ; SI-OPT-NEXT: br i1 [[TMP7]], label [[BB2]], label [[BB14:%.*]] ; SI-OPT: bb14: ; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]]) ; SI-OPT-NEXT: [[TMP15:%.*]] = fcmp nsz oeq float [[TMP]], 0.000000e+00 ; SI-OPT-NEXT: [[TMP9:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]]) ; SI-OPT-NEXT: [[TMP10:%.*]] = extractvalue { i1, i64 } [[TMP9]], 0 ; SI-OPT-NEXT: [[TMP11:%.*]] = extractvalue { i1, i64 } [[TMP9]], 1 ; SI-OPT-NEXT: br i1 [[TMP10]], label [[BB17:%.*]], label [[BB16:%.*]] ; SI-OPT: bb16: ; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP11]]) ; SI-OPT-NEXT: store float 0x7FF8000000000000, ptr addrspace(5) null, align 16 ; SI-OPT-NEXT: br label [[BB17]] ; SI-OPT: bb17: ; SI-OPT-NEXT: store float [[TMP]], ptr addrspace(5) null, align 16 ; SI-OPT-NEXT: br label [[BB18]] ; SI-OPT: bb18: ; SI-OPT-NEXT: store float 0x7FF8000000000000, ptr addrspace(5) null, align 4 ; SI-OPT-NEXT: br label [[BB2]] ; ; GCN-LABEL: blam: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 28 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: v_writelane_b32 v40, s34, 2 ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 ; GCN-NEXT: v_writelane_b32 v40, s38, 6 ; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s40, 8 ; GCN-NEXT: v_writelane_b32 v40, s41, 9 ; GCN-NEXT: v_writelane_b32 v40, s42, 10 ; GCN-NEXT: v_writelane_b32 v40, s43, 11 ; GCN-NEXT: v_writelane_b32 v40, s44, 12 ; GCN-NEXT: v_writelane_b32 v40, s45, 13 ; GCN-NEXT: v_writelane_b32 v40, s46, 14 ; GCN-NEXT: v_writelane_b32 v40, s47, 15 ; GCN-NEXT: v_writelane_b32 v40, s48, 16 ; GCN-NEXT: v_writelane_b32 v40, s49, 17 ; GCN-NEXT: v_writelane_b32 v40, s50, 18 ; GCN-NEXT: v_writelane_b32 v40, s51, 19 ; GCN-NEXT: v_writelane_b32 v40, s52, 20 ; GCN-NEXT: v_writelane_b32 v40, s53, 21 ; GCN-NEXT: v_writelane_b32 v40, s54, 22 ; GCN-NEXT: v_writelane_b32 v40, s55, 23 ; GCN-NEXT: v_writelane_b32 v40, s56, 24 ; GCN-NEXT: v_writelane_b32 v40, s57, 25 ; GCN-NEXT: v_writelane_b32 v40, s58, 26 ; GCN-NEXT: v_writelane_b32 v40, s59, 27 ; GCN-NEXT: v_mov_b32_e32 v41, v31 ; GCN-NEXT: s_mov_b32 s46, s15 ; GCN-NEXT: s_mov_b32 s47, s14 ; GCN-NEXT: s_mov_b32 s48, s13 ; GCN-NEXT: s_mov_b32 s49, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] ; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] ; GCN-NEXT: s_mov_b64 s[50:51], 0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v41 ; GCN-NEXT: flat_load_dword v44, v[0:1] ; GCN-NEXT: v_mov_b32_e32 v43, 0 ; GCN-NEXT: s_getpc_b64 s[52:53] ; GCN-NEXT: s_add_u32 s52, s52, spam@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s53, s53, spam@rel32@hi+12 ; GCN-NEXT: v_lshlrev_b32_e32 v42, 2, v2 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_f32_e64 s[54:55], 0, v44 ; GCN-NEXT: v_cmp_neq_f32_e64 s[42:43], 0, v44 ; GCN-NEXT: v_mov_b32_e32 v45, 0x7fc00000 ; GCN-NEXT: s_branch .LBB1_2 ; GCN-NEXT: .LBB1_1: ; %Flow7 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] ; GCN-NEXT: s_and_b64 s[4:5], exec, s[4:5] ; GCN-NEXT: s_or_b64 s[50:51], s[4:5], s[50:51] ; GCN-NEXT: s_andn2_b64 exec, exec, s[50:51] ; GCN-NEXT: s_cbranch_execz .LBB1_18 ; GCN-NEXT: .LBB1_2: ; %bb2 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: flat_load_dword v0, v[42:43] ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], 0 ; GCN-NEXT: s_mov_b64 s[4:5], -1 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GCN-NEXT: s_xor_b64 s[56:57], exec, s[8:9] ; GCN-NEXT: s_cbranch_execz .LBB1_12 ; GCN-NEXT: ; %bb.3: ; %bb6 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: v_cmp_eq_u32_e64 s[44:45], 3, v0 ; GCN-NEXT: s_and_saveexec_b64 s[58:59], s[44:45] ; GCN-NEXT: s_cbranch_execz .LBB1_11 ; GCN-NEXT: ; %bb.4: ; %bb11 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] ; GCN-NEXT: s_mov_b32 s12, s49 ; GCN-NEXT: s_mov_b32 s13, s48 ; GCN-NEXT: s_mov_b32 s14, s47 ; GCN-NEXT: s_mov_b32 s15, s46 ; GCN-NEXT: v_mov_b32_e32 v31, v41 ; GCN-NEXT: s_swappc_b64 s[30:31], s[52:53] ; GCN-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz .LBB1_10 ; GCN-NEXT: ; %bb.5: ; %bb14 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_mov_b64 s[8:9], s[54:55] ; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[42:43] ; GCN-NEXT: s_cbranch_execz .LBB1_7 ; GCN-NEXT: ; %bb.6: ; %bb16 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], 0 ; GCN-NEXT: s_or_b64 s[8:9], s[54:55], exec ; GCN-NEXT: .LBB1_7: ; %Flow3 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[8:9] ; GCN-NEXT: s_xor_b64 s[8:9], exec, s[10:11] ; GCN-NEXT: s_cbranch_execz .LBB1_9 ; GCN-NEXT: ; %bb.8: ; %bb17 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0 ; GCN-NEXT: .LBB1_9: ; %Flow4 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] ; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec ; GCN-NEXT: .LBB1_10: ; %Flow2 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_andn2_b64 s[4:5], s[44:45], exec ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec ; GCN-NEXT: s_or_b64 s[44:45], s[4:5], s[8:9] ; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec ; GCN-NEXT: .LBB1_11: ; %Flow1 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[58:59] ; GCN-NEXT: s_orn2_b64 s[4:5], s[44:45], exec ; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec ; GCN-NEXT: ; implicit-def: $vgpr0 ; GCN-NEXT: .LBB1_12: ; %Flow ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[56:57] ; GCN-NEXT: s_cbranch_execz .LBB1_16 ; GCN-NEXT: ; %bb.13: ; %bb8 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_mov_b64 s[10:11], s[6:7] ; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc ; GCN-NEXT: s_cbranch_execz .LBB1_15 ; GCN-NEXT: ; %bb.14: ; %bb10 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], 0 ; GCN-NEXT: s_or_b64 s[10:11], s[6:7], exec ; GCN-NEXT: .LBB1_15: ; %Flow6 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[12:13] ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec ; GCN-NEXT: s_and_b64 s[12:13], vcc, exec ; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec ; GCN-NEXT: s_and_b64 s[10:11], s[10:11], exec ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] ; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] ; GCN-NEXT: .LBB1_16: ; %Flow5 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] ; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] ; GCN-NEXT: s_cbranch_execz .LBB1_1 ; GCN-NEXT: ; %bb.17: ; %bb18 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], 0 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec ; GCN-NEXT: s_branch .LBB1_1 ; GCN-NEXT: .LBB1_18: ; %DummyReturnBlock ; GCN-NEXT: s_or_b64 exec, exec, s[50:51] ; GCN-NEXT: v_readlane_b32 s59, v40, 27 ; GCN-NEXT: v_readlane_b32 s58, v40, 26 ; GCN-NEXT: v_readlane_b32 s57, v40, 25 ; GCN-NEXT: v_readlane_b32 s56, v40, 24 ; GCN-NEXT: v_readlane_b32 s55, v40, 23 ; GCN-NEXT: v_readlane_b32 s54, v40, 22 ; GCN-NEXT: v_readlane_b32 s53, v40, 21 ; GCN-NEXT: v_readlane_b32 s52, v40, 20 ; GCN-NEXT: v_readlane_b32 s51, v40, 19 ; GCN-NEXT: v_readlane_b32 s50, v40, 18 ; GCN-NEXT: v_readlane_b32 s49, v40, 17 ; GCN-NEXT: v_readlane_b32 s48, v40, 16 ; GCN-NEXT: v_readlane_b32 s47, v40, 15 ; GCN-NEXT: v_readlane_b32 s46, v40, 14 ; GCN-NEXT: v_readlane_b32 s45, v40, 13 ; GCN-NEXT: v_readlane_b32 s44, v40, 12 ; GCN-NEXT: v_readlane_b32 s43, v40, 11 ; GCN-NEXT: v_readlane_b32 s42, v40, 10 ; GCN-NEXT: v_readlane_b32 s41, v40, 9 ; GCN-NEXT: v_readlane_b32 s40, v40, 8 ; GCN-NEXT: v_readlane_b32 s39, v40, 7 ; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 ; GCN-NEXT: v_readlane_b32 s34, v40, 2 ; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GCN-NEXT: v_readlane_b32 s4, v40, 28 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0xf800 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] bb: %tmp = load float, ptr null, align 16 br label %bb2 bb1: ; preds = %bb8, %bb6 br label %bb2 bb2: %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, ptr addrspace(1) null, i32 %tid %tmp3 = load i32, ptr addrspace(1) %gep, align 16 store float 0.000000e+00, ptr addrspace(5) null, align 8 br label %bb4 bb4: ; preds = %bb2 %tmp5 = icmp slt i32 %tmp3, 3 br i1 %tmp5, label %bb8, label %bb6 bb6: ; preds = %bb4 %tmp7 = icmp eq i32 %tmp3, 3 br i1 %tmp7, label %bb11, label %bb1 bb8: ; preds = %bb4 %tmp9 = icmp eq i32 %tmp3, 1 br i1 %tmp9, label %bb10, label %bb1 bb10: ; preds = %bb8 store float 0x7FF8000000000000, ptr addrspace(5) null, align 16 br label %bb18 bb11: ; preds = %bb6 %tmp12 = call float @spam() %tmp13 = fcmp nsz oeq float %tmp12, 0.000000e+00 br i1 %tmp13, label %bb2, label %bb14 bb14: ; preds = %bb11 %tmp15 = fcmp nsz oeq float %tmp, 0.000000e+00 br i1 %tmp15, label %bb17, label %bb16 bb16: ; preds = %bb14 store float 0x7FF8000000000000, ptr addrspace(5) null, align 16 br label %bb17 bb17: ; preds = %bb16, %bb14 store float %tmp, ptr addrspace(5) null, align 16 br label %bb18 bb18: ; preds = %bb17, %bb10 store float 0x7FF8000000000000, ptr addrspace(5) null, align 4 br label %bb2 } declare i32 @llvm.amdgcn.workitem.id.x() declare hidden float @spam()