Mercurial > hg > CbC > CbC_llvm

diff test/Transforms/SROA/big-endian.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author: Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date: Tue, 13 Oct 2015 17:48:58 +0900
parents: 95c75e76d11b
children: 3a76565eade5
--- a/test/Transforms/SROA/big-endian.ll	Wed Feb 18 14:56:07 2015 +0900
+++ b/test/Transforms/SROA/big-endian.ll	Tue Oct 13 17:48:58 2015 +0900
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
@@ -16,14 +15,14 @@
   %b = alloca [3 x i8]
 ; CHECK-NOT: alloca
 
-  %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
+  %a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0
   store i8 0, i8* %a0ptr
-  %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
+  %a1ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 1
   store i8 0, i8* %a1ptr
-  %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
+  %a2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
-  %ai = load i24* %aiptr
+  %ai = load i24, i24* %aiptr
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
@@ -40,12 +39,12 @@
 
   %biptr = bitcast [3 x i8]* %b to i24*
   store i24 %ai, i24* %biptr
-  %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
-  %b0 = load i8* %b0ptr
-  %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
-  %b1 = load i8* %b1ptr
-  %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
-  %b2 = load i8* %b2ptr
+  %b0ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 0
+  %b0 = load i8, i8* %b0ptr
+  %b1ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 1
+  %b1 = load i8, i8* %b1ptr
+  %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2
+  %b2 = load i8, i8* %b2ptr
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 ; CHECK:      %[[shift0:.*]] = lshr i24 %[[insert0]], 16
@@ -72,10 +71,10 @@
   %a = alloca [7 x i8]
 ; CHECK-NOT: alloca
 
-  %a0ptr = getelementptr [7 x i8]* %a, i64 0, i32 0
-  %a1ptr = getelementptr [7 x i8]* %a, i64 0, i32 1
-  %a2ptr = getelementptr [7 x i8]* %a, i64 0, i32 2
-  %a3ptr = getelementptr [7 x i8]* %a, i64 0, i32 3
+  %a0ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 0
+  %a1ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 1
+  %a2ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 2
+  %a3ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 3
 
 ; CHECK-NOT: store
 ; CHECK-NOT: load
@@ -102,7 +101,7 @@
 ; CHECK-NOT: load
 
   %aiptr = bitcast [7 x i8]* %a to i56*
-  %ai = load i56* %aiptr
+  %ai = load i56, i56* %aiptr
   %ret = zext i56 %ai to i64
   ret i64 %ret
 ; CHECK-NEXT: %[[ext4:.*]] = zext i16 1 to i56
@@ -112,3 +111,126 @@
 ; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
 ; CHECK-NEXT: ret i64 %[[ret]]
 }
+
+define i64 @PR14132(i1 %flag) {
+; CHECK-LABEL: @PR14132(
+; Here we form a PHI-node by promoting the pointer alloca first, and then in
+; order to promote the other two allocas, we speculate the load of the
+; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
+; alloca. While this is a bit dubious, we were asserting on trying to
+; rewrite it. The trick is that the code using the value may carefully take
+; steps to only use the not-undef bits, and so we need to at least loosely
+; support this. This test is particularly interesting because how we handle
+; a load of an i64 from an i8 alloca is dependent on endianness.
+entry:
+  %a = alloca i64, align 8
+  %b = alloca i8, align 8
+  %ptr = alloca i64*, align 8
+; CHECK-NOT: alloca
+
+  %ptr.cast = bitcast i64** %ptr to i8**
+  store i64 0, i64* %a
+  store i8 1, i8* %b
+  store i64* %a, i64** %ptr
+  br i1 %flag, label %if.then, label %if.end
+
+if.then:
+  store i8* %b, i8** %ptr.cast
+  br label %if.end
+; CHECK-NOT: store
+; CHECK: %[[ext:.*]] = zext i8 1 to i64
+; CHECK: %[[shift:.*]] = shl i64 %[[ext]], 56
+
+if.end:
+  %tmp = load i64*, i64** %ptr
+  %result = load i64, i64* %tmp
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i64 [ %[[shift]], %if.then ], [ 0, %entry ]
+
+  ret i64 %result
+; CHECK-NEXT: ret i64 %[[result]]
+}
+
+declare void @f(i64 %x, i32 %y)
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+;
+; This is a test that specifically exercises the big-endian lowering because it
+; ends up splitting a 64-bit integer into two smaller integers and has a number
+; of tricky aspects (the i24 type) that make that hard. Historically, SROA
+; would miscompile this by either dropping a most significant byte or least
+; significant byte due to shrinking the [4,8) slice to an i24, or by failing to
+; move the bytes around correctly.
+;
+; The magical number 34494054408 is used because it has bits set in various
+; bytes so that it is clear if those bytes fail to be propagated.
+;
+; If you're debugging this, rather than using the direct magical numbers, run
+; the IR through '-sroa -instcombine'. With '-instcombine' these will be
+; constant folded, and if the i64 doesn't round-trip correctly, you've found
+; a bug!
+;
+entry:
+  %a = alloca { i32, i24 }, align 4
+; CHECK-NOT: alloca
+
+  %tmp0 = bitcast { i32, i24 }* %a to i64*
+  store i64 34494054408, i64* %tmp0
+  %tmp1 = load i64, i64* %tmp0, align 4
+  %tmp2 = bitcast { i32, i24 }* %a to i32*
+  %tmp3 = load i32, i32* %tmp2, align 4
+; CHECK: %[[HI_EXT:.*]] = zext i32 134316040 to i64
+; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
+; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
+; CHECK: %[[LO_EXT:.*]] = zext i32 8 to i64
+; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
+; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
+; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
+
+  call void @f(i64 %tmp1, i32 %tmp3)
+; CHECK: call void @f(i64 %[[LO_MERGE]], i32 8)
+  ret void
+; CHECK: ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: @test4
+;
+; Much like @test3, this is specifically testing big-endian management of data.
+; Also similarly, it uses constants with particular bits set to help track
+; whether values are corrupted, and can be easily evaluated by running through
+; -instcombine to see that the i64 round-trips.
+;
+entry:
+  %a = alloca { i32, i24 }, align 4
+  %a2 = alloca i64, align 4
+; CHECK-NOT: alloca
+
+  store i64 34494054408, i64* %a2
+  %tmp0 = bitcast { i32, i24 }* %a to i8*
+  %tmp1 = bitcast i64* %a2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp0, i8* %tmp1, i64 8, i32 4, i1 false)
+; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32
+; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32
+; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32
+
+  %tmp2 = bitcast { i32, i24 }* %a to i64*
+  %tmp3 = load i64, i64* %tmp2, align 4
+  %tmp4 = bitcast { i32, i24 }* %a to i32*
+  %tmp5 = load i32, i32* %tmp4, align 4
+; CHECK: %[[HI_EXT:.*]] = zext i32 %[[HI_START]] to i64
+; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
+; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
+; CHECK: %[[LO_EXT:.*]] = zext i32 %[[LO_START]] to i64
+; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
+; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
+; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
+
+  call void @f(i64 %tmp3, i32 %tmp5)
+; CHECK: call void @f(i64 %[[LO_MERGE]], i32 %[[LO_START]])
+  ret void
+; CHECK: ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
author	Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date	Tue, 13 Oct 2015 17:48:58 +0900
parents	95c75e76d11b
children	3a76565eade5