diff llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @ 236:c4bab56944e8 llvm-original

LLVM 16
author kono
date Wed, 09 Nov 2022 17:45:10 +0900
parents 79ff65ed7e25
children
line wrap: on
line diff
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp	Wed Jul 21 10:27:27 2021 +0900
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp	Wed Nov 09 17:45:10 2022 +0900
@@ -41,7 +41,9 @@
 void getSimilarities(
     Module &M,
     std::vector<std::vector<IRSimilarityCandidate>> &SimilarityCandidates) {
-  IRSimilarityIdentifier Identifier;
+  // In order to keep the size of the tests from becoming too large, we do not
+  // recognize similarity for branches unless explicitly needed.
+  IRSimilarityIdentifier Identifier(/*EnableBranchMatching = */false);
   SimilarityCandidates = Identifier.findSimilarity(M);
 }
 
@@ -726,22 +728,8 @@
   ASSERT_TRUE(UnsignedVec[0] != UnsignedVec[1]);
 }
 
-// In most cases, the illegal instructions we are collecting don't require any
-// sort of setup.  In these cases, we can just only have illegal instructions,
-// and the mapper will create 0 length vectors, and we can check that.
-
-// In cases where we have legal instructions needed to set up the illegal
-// instruction, to check illegal instructions are assigned unsigned integers
-// from the maximum value decreasing to 0, it will be greater than a legal
-// instruction that comes after.  So to check that we have an illegal
-// instruction, we place a legal instruction after an illegal instruction, and
-// check that the illegal unsigned integer is greater than the unsigned integer
-// of the legal instruction.
-
-// Checks that the branch is mapped to be illegal since there is extra checking
-// needed to ensure that a branch in one region is branching to an isomorphic
-// location in a different region.
-TEST(IRInstructionMapper, BranchIllegal) {
+// Checks that the branch is mapped to legal when the option is set.
+TEST(IRInstructionMapper, BranchLegal) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {
                           bb0:
@@ -759,20 +747,23 @@
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+  ASSERT_TRUE(UnsignedVec[1] > UnsignedVec[0]);
+  ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]);
 }
 
-// Checks that a PHINode is mapped to be illegal since there is extra checking
-// needed to ensure that a branch in one region is bin an isomorphic
-// location in a different region.
-TEST(IRInstructionMapper, PhiIllegal) {
+// Checks that a PHINode is mapped to be legal.
+TEST(IRInstructionMapper, PhiLegal) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {
                           bb0:
                              %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
                              ret i32 0
                           bb1:
                              ret i32 1
@@ -786,12 +777,54 @@
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
 }
 
+// Checks that a PHINode is mapped to be legal.
+TEST(IRInstructionMapper, PhiIllegal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
+                             ret i32 0
+                          bb1:
+                             ret i32 1
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
+}
+
+// In most cases, the illegal instructions we are collecting don't require any
+// sort of setup.  In these cases, we can just only have illegal instructions,
+// and the mapper will create 0 length vectors, and we can check that.
+
+// In cases where we have legal instructions needed to set up the illegal
+// instruction, to check illegal instructions are assigned unsigned integers
+// from the maximum value decreasing to 0, it will be greater than a legal
+// instruction that comes after.  So to check that we have an illegal
+// instruction, we place a legal instruction after an illegal instruction, and
+// check that the illegal unsigned integer is greater than the unsigned integer
+// of the legal instruction.
+
 // Checks that an alloca instruction is mapped to be illegal.
 TEST(IRInstructionMapper, AllocaIllegal) {
   StringRef ModuleString = R"(
@@ -812,7 +845,8 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // Checks that an getelementptr instruction is mapped to be legal.  And that
@@ -929,8 +963,8 @@
   ASSERT_NE(UnsignedVec[0], UnsignedVec[1]);
 }
 
-// Checks that indirect call instructions are mapped to be illegal since we
-// cannot guarantee the same function in two different cases.
+// Checks that indirect call instructions are mapped to be illegal when it is
+// specified to disallow them.
 TEST(IRInstructionMapper, CallsIllegalIndirect) {
   StringRef ModuleString = R"(
                           define i32 @f(void()* %func) {
@@ -947,10 +981,38 @@
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIndirectCalls = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
+}
+
+// Checks that indirect call instructions are mapped to be legal when it is not
+// specified to disallow them.
+TEST(IRInstructionMapper, CallsLegalIndirect) {
+  StringRef ModuleString = R"(
+                          define i32 @f(void()* %func) {
+                          bb0:
+                             call void %func()
+                             call void %func()
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIndirectCalls = true;
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
 }
 
 // Checks that a call instruction is mapped to be legal.  Here we check that
@@ -982,7 +1044,36 @@
 }
 
 // Here we check that a calls with different names, but the same arguments types
-// are mapped to different value.
+// are mapped to different value when specified that the name must match.
+TEST(IRInstructionMapper, CallsSameArgTypeDifferentNameDisallowed) {
+  StringRef ModuleString = R"(
+                          declare i32 @f1(i32, i32)
+                          declare i32 @f2(i32, i32)
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = call i32 @f1(i32 %a, i32 %b)
+                             %1 = call i32 @f2(i32 %a, i32 %b)
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.EnableMatchCallsByName = true;
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+  ASSERT_NE(UnsignedVec[0], UnsignedVec[1]);
+}
+
+// Here we check that a calls with different names, but the same arguments types
+// are mapped to the same value when it is not specifed that they must match.
 TEST(IRInstructionMapper, CallsSameArgTypeDifferentName) {
   StringRef ModuleString = R"(
                           declare i32 @f1(i32, i32)
@@ -1002,11 +1093,12 @@
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.EnableMatchCallsByName = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
   ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
-  ASSERT_NE(UnsignedVec[0], UnsignedVec[1]);
+  ASSERT_EQ(UnsignedVec[0], UnsignedVec[1]);
 }
 
 // Here we check that a calls with different names, and different arguments
@@ -1176,7 +1268,8 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // Checks that an callbr instructions are considered to be illegal.  Callbr
@@ -1209,7 +1302,8 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // Checks that an debuginfo intrinsics are mapped to be invisible.  Since they
@@ -1267,7 +1361,8 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // Checks that an eh.exceptioncode intrinsic is mapped to be illegal.
@@ -1299,7 +1394,8 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // Checks that an eh.unwind intrinsic is mapped to be illegal.
@@ -1324,7 +1420,8 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // Checks that an eh.exceptionpointer intrinsic is mapped to be illegal.
@@ -1349,7 +1446,8 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // Checks that a catchpad instruction is mapped to an illegal value.
@@ -1380,7 +1478,8 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // Checks that a cleanuppad instruction is mapped to an illegal value.
@@ -1411,14 +1510,16 @@
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(1));
+  ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber);
 }
 
 // The following three instructions are memory transfer and setting based, which
 // are considered illegal since is extra checking needed to handle the address
 // space checking.
 
-// Checks that a memset instruction is mapped to an illegal value.
+// Checks that a memset instruction is mapped to an illegal value when
+// specified.
 TEST(IRInstructionMapper, MemSetIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1442,14 +1543,16 @@
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(6));
-  ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]);
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(7));
+  ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]);
 }
 
-// Checks that a memcpy instruction is mapped to an illegal value.
+// Checks that a memcpy instruction is mapped to an illegal value  when
+// specified.
 TEST(IRInstructionMapper, MemCpyIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1473,14 +1576,17 @@
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(6));
-  ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]);
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(7));
+  ASSERT_GT(UnsignedVec[2], UnsignedVec[3]);
+  ASSERT_LT(UnsignedVec[2], UnsignedVec[0]);
 }
 
-// Checks that a memmove instruction is mapped to an illegal value.
+// Checks that a memmove instruction is mapped to an illegal value  when
+// specified.
 TEST(IRInstructionMapper, MemMoveIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1504,11 +1610,51 @@
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(6));
-  ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[1]);
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(7));
+  ASSERT_LT(UnsignedVec[2], UnsignedVec[0]);
+}
+
+// Checks that mem* instructions are mapped to an legal value when not
+// specified, and that all the intrinsics are marked differently.
+TEST(IRInstructionMapper, MemOpsLegal) {
+  StringRef ModuleString = R"(
+  declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
+  declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
+  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
+
+  define i64 @function(i64 %x, i64 %z, i64 %n) {
+  entry:
+    %pool = alloca [59 x i64], align 4
+    %tmp = bitcast [59 x i64]* %pool to i8*
+    call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+    call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+    call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+    %cmp3 = icmp eq i64 %n, 0
+    %a = add i64 %x, %z
+    %c = add i64 %x, %z
+    ret i64 0
+  })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = true;
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(9));
+  ASSERT_LT(UnsignedVec[2], UnsignedVec[3]);
+  ASSERT_LT(UnsignedVec[3], UnsignedVec[4]);
+  ASSERT_LT(UnsignedVec[4], UnsignedVec[5]);
 }
 
 // Checks that a variable argument instructions are mapped to an illegal value.
@@ -1552,14 +1698,14 @@
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(16));
-  ASSERT_TRUE(UnsignedVec[4] < UnsignedVec[3]);
-  ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[6]);
-  ASSERT_TRUE(UnsignedVec[10] < UnsignedVec[9]);
-  ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[12]);
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(17));
+  ASSERT_TRUE(UnsignedVec[7] < UnsignedVec[0]);
+  ASSERT_TRUE(UnsignedVec[13] < UnsignedVec[10]);
+  ASSERT_TRUE(UnsignedVec[16] < UnsignedVec[13]);
 }
 
 // Check the length of adding two illegal instructions one after th other.  We
@@ -1599,21 +1745,23 @@
 // two blocks of two legal instructions and terminator, and checks them for
 // instruction similarity.
 static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList,
-                               bool Structure = false) {
+                               bool Structure = false, unsigned Length = 2,
+                               unsigned StartIdxOne = 0,
+                               unsigned StartIdxTwo = 3) {
   std::vector<IRInstructionData *>::iterator Start, End;
 
   Start = InstrList.begin();
   End = InstrList.begin();
 
-  std::advance(End, 1);
-  IRSimilarityCandidate Cand1(0, 2, *Start, *End);
+  std::advance(End, StartIdxOne + Length - 1);
+  IRSimilarityCandidate Cand1(StartIdxOne, Length, *Start, *End);
 
   Start = InstrList.begin();
   End = InstrList.begin();
 
-  std::advance(Start, 3);
-  std::advance(End, 4);
-  IRSimilarityCandidate Cand2(3, 2, *Start, *End);
+  std::advance(Start, StartIdxTwo);
+  std::advance(End, StartIdxTwo + Length - 1);
+  IRSimilarityCandidate Cand2(StartIdxTwo, Length, *Start, *End);
   if (Structure)
     return IRSimilarityCandidate::compareStructure(Cand1, Cand2);
   return IRSimilarityCandidate::isSimilar(Cand1, Cand2);
@@ -1986,6 +2134,70 @@
   ASSERT_TRUE(longSimCandCompare(InstrList, true));
 }
 
+// Checks that the canonical numbering between two candidates matches the found
+// mapping between two candidates.
+TEST(IRSimilarityCandidate, CanonicalNumbering) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = sub i32 %b, %a
+                             ret i32 0
+                          bb1:
+                             %2 = add i32 %a, %b
+                             %3 = sub i32 %b, %a
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+
+  std::vector<IRInstructionData *>::iterator Start, End;
+
+  Start = InstrList.begin();
+  End = InstrList.begin();
+
+  std::advance(End, 1);
+  IRSimilarityCandidate Cand1(0, 2, *Start, *End);
+
+  Start = InstrList.begin();
+  End = InstrList.begin();
+
+  std::advance(Start, 3);
+  std::advance(End, 4);
+  IRSimilarityCandidate Cand2(3, 2, *Start, *End);
+  DenseMap<unsigned, DenseSet<unsigned>> Mapping1;
+  DenseMap<unsigned, DenseSet<unsigned>> Mapping2;
+  ASSERT_TRUE(IRSimilarityCandidate::compareStructure(Cand1, Cand2, Mapping1,
+                                                      Mapping2));
+  IRSimilarityCandidate::createCanonicalMappingFor(Cand1);
+  Cand2.createCanonicalRelationFrom(Cand1, Mapping1, Mapping2);
+
+  for (std::pair<unsigned, DenseSet<unsigned>> &P : Mapping2) {
+    unsigned Source = P.first;
+
+    ASSERT_TRUE(Cand2.getCanonicalNum(Source).has_value());
+    unsigned Canon = *Cand2.getCanonicalNum(Source);
+    ASSERT_TRUE(Cand1.fromCanonicalNum(Canon).has_value());
+    unsigned Dest = *Cand1.fromCanonicalNum(Canon);
+
+    DenseSet<unsigned>::iterator It = P.second.find(Dest);
+    ASSERT_NE(It, P.second.end());
+  }
+}
+
 // Checks that the same structure is recognized between two candidates. While
 // the input names are reversed, they still perform the same overall operation.
 TEST(IRSimilarityCandidate, DifferentNameSameStructure) {
@@ -2019,6 +2231,308 @@
   ASSERT_TRUE(longSimCandCompare(InstrList, true));
 }
 
+// Checks that the same structure is recognized between two candidates when
+// the branches target other blocks inside the same region, the relative
+// distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, SameBranchStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             br label %bb1
+                          bb1:
+                             %2 = add i32 %b, %a
+                             %3 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             br label %bb1
+                          bb1:
+                             %2 = add i32 %b, %a
+                             %3 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(12));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList, true, 5, 0, 6));
+}
+
+// Checks that the different structure is recognized between two candidates,
+// when the branches target other blocks inside the same region, the relative
+// distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, DifferentBranchStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             br label %bb2
+                          bb1:
+                             %2 = add i32 %b, %a
+                             %3 = add i32 %a, %b
+                             br label %bb2
+                          bb2:
+                             %4 = add i32 %b, %a
+                             %5 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             br label %bb1
+                          bb1:
+                             %2 = add i32 %b, %a
+                             %3 = add i32 %a, %b
+                             br label %bb2
+                          bb2:
+                             %4 = add i32 %b, %a
+                             %5 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(18));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_FALSE(longSimCandCompare(InstrList, true, 6, 0, 9));
+}
+
+// Checks that the same structure is recognized between two candidates, when
+// the branches target other blocks outside region, the relative distance
+// does not need to be the same.
+TEST(IRSimilarityCandidate, SameBranchStructureOutside) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             br label %bb1
+                          bb1:
+                             %2 = add i32 %b, %a
+                             %3 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             br label %bb1
+                          bb1:
+                             %2 = add i32 %b, %a
+                             %3 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(12));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6));
+}
+
+// Checks that the same structure is recognized between two candidates, when
+// the branches target other blocks outside region, the relative distance
+// does not need to be the same.
+TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             br label %bb1
+                          bb1:
+                             %2 = add i32 %b, %a
+                             %3 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = add i32 %a, %b
+                             %1 = add i32 %b, %a
+                             br label %bb2
+                          bb1:
+                             %2 = add i32 %b, %a
+                             %3 = add i32 %a, %b
+                             br label %bb2
+                          bb2:
+                             %4 = add i32 %b, %a
+                             %5 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(15));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6));
+}
+
+// Checks that the same structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, SamePHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ]
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(12));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList, true, 4, 0, 6));
+}
+
+// Checks that the different structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, DifferentPHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb3 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(14));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_FALSE(longSimCandCompare(InstrList, true, 5, 0, 7));
+}
+
 // Checks that two sets of identical instructions are found to be the same.
 // Both sequences of adds have the same operand ordering, and the same
 // instructions, making them strcturally equivalent.
@@ -2105,6 +2619,62 @@
   }
 }
 
+// This test ensures that when the first instruction in a sequence is
+// a commutative instruction with the same value (mcomm_inst_same_val), but the
+// corresponding instruction (comm_inst_diff_val) is not, we mark the regions
+// and not similar.
+TEST(IRSimilarityIdentifier, CommutativeSameValueFirstMisMatch) {
+  StringRef ModuleString = R"(
+                          define void @v_1_0(i64 %v_33) {
+                            entry:
+                              %comm_inst_same_val = mul i64 undef, undef
+                              %add = add i64 %comm_inst_same_val, %v_33
+                              %comm_inst_diff_val = mul i64 0, undef
+                              %mul.i = add i64 %comm_inst_diff_val, %comm_inst_diff_val
+                              unreachable
+                            })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<std::vector<IRSimilarityCandidate>> SimilarityCandidates;
+  getSimilarities(*M, SimilarityCandidates);
+
+  ASSERT_TRUE(SimilarityCandidates.size() == 0);
+}
+
+// This test makes sure that intrinsic functions that are marked commutative
+// are still treated as non-commutative since they are function calls.
+TEST(IRSimilarityIdentifier, IntrinsicCommutative) {
+  // If treated as commutative, we will fail to find a valid mapping, causing
+  // an assertion error.
+  StringRef ModuleString = R"(
+  define void @foo() {
+    entry:
+      %0 = call i16 @llvm.smul.fix.i16(i16 16384, i16 16384, i32 15)
+      store i16 %0, i16* undef, align 1
+      %1 = icmp eq i16 undef, 8192
+      call void @bar()
+      %2 = call i16 @llvm.smul.fix.i16(i16 -16384, i16 16384, i32 15)
+      store i16 %2, i16* undef, align 1
+      %3 = icmp eq i16 undef, -8192
+      call void @bar()
+      %4 = call i16 @llvm.smul.fix.i16(i16 -16384, i16 -16384, i32 15)
+      ret void
+  }
+
+  declare void @bar()
+
+  ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+  declare i16 @llvm.smul.fix.i16(i16, i16, i32 immarg))";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<std::vector<IRSimilarityCandidate>> SimilarityCandidates;
+  getSimilarities(*M, SimilarityCandidates);
+
+  ASSERT_TRUE(SimilarityCandidates.size() == 0);
+}
+
 // This test checks to see whether we can detect different structure in
 // commutative instructions.  In this case, the second operand in the second
 // add is different.