CbC/CbC_llvm: clang/lib/Analysis/CloneDetection.cpp annotate

annotate clang/lib/Analysis/CloneDetection.cpp @ 176:de4ac79aef9d

...

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 25 May 2020 17:13:11 +0900
parents	0572611fdcc8
children	c4bab56944e8

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 //===--- CloneDetection.cpp - Finds code clones in an AST -------- C++ --===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 // See https://llvm.org/LICENSE.txt for license information.
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 /// This file implements classes for searching and analyzing source code clones.
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	12
1d019706d866 LLVM10 anatofuz parents: diff changeset	13 #include "clang/Analysis/CloneDetection.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 #include "clang/AST/Attr.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	15 #include "clang/AST/DataCollection.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 #include "clang/AST/DeclTemplate.h"
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	17 #include "clang/Basic/SourceManager.h"
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	18 #include "llvm/Support/MD5.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 #include "llvm/Support/Path.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	20
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 using namespace clang;
1d019706d866 LLVM10 anatofuz parents: diff changeset	22
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 StmtSequence::StmtSequence(const CompoundStmt Stmt, const Decl D,
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 unsigned StartIndex, unsigned EndIndex)
1d019706d866 LLVM10 anatofuz parents: diff changeset	25 : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	26 assert(Stmt && "Stmt must not be a nullptr");
1d019706d866 LLVM10 anatofuz parents: diff changeset	27 assert(StartIndex < EndIndex && "Given array should not be empty");
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt");
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	30
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 StmtSequence::StmtSequence(const Stmt Stmt, const Decl D)
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 : S(Stmt), D(D), StartIndex(0), EndIndex(0) {}
1d019706d866 LLVM10 anatofuz parents: diff changeset	33
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 StmtSequence::StmtSequence()
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {}
1d019706d866 LLVM10 anatofuz parents: diff changeset	36
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 bool StmtSequence::contains(const StmtSequence &Other) const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 // If both sequences reside in different declarations, they can never contain
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 // each other.
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 if (D != Other.D)
1d019706d866 LLVM10 anatofuz parents: diff changeset	41 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	42
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 const SourceManager &SM = getASTContext().getSourceManager();
1d019706d866 LLVM10 anatofuz parents: diff changeset	44
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 // Otherwise check if the start and end locations of the current sequence
1d019706d866 LLVM10 anatofuz parents: diff changeset	46 // surround the other sequence.
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 bool StartIsInBounds =
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 SM.isBeforeInTranslationUnit(getBeginLoc(), Other.getBeginLoc()) \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 getBeginLoc() == Other.getBeginLoc();
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 if (!StartIsInBounds)
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	52
1d019706d866 LLVM10 anatofuz parents: diff changeset	53 bool EndIsInBounds =
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	55 Other.getEndLoc() == getEndLoc();
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 return EndIsInBounds;
1d019706d866 LLVM10 anatofuz parents: diff changeset	57 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	58
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 StmtSequence::iterator StmtSequence::begin() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 if (!holdsSequence()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	61 return &S;
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 auto CS = cast<CompoundStmt>(S);
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 return CS->body_begin() + StartIndex;
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	66
1d019706d866 LLVM10 anatofuz parents: diff changeset	67 StmtSequence::iterator StmtSequence::end() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	68 if (!holdsSequence()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 return reinterpret_cast<StmtSequence::iterator>(&S) + 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 auto CS = cast<CompoundStmt>(S);
1d019706d866 LLVM10 anatofuz parents: diff changeset	72 return CS->body_begin() + EndIndex;
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	74
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 ASTContext &StmtSequence::getASTContext() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 assert(D);
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 return D->getASTContext();
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	79
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 SourceLocation StmtSequence::getBeginLoc() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	81 return front()->getBeginLoc();
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	83
1d019706d866 LLVM10 anatofuz parents: diff changeset	84 SourceLocation StmtSequence::getEndLoc() const { return back()->getEndLoc(); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	85
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 SourceRange StmtSequence::getSourceRange() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 return SourceRange(getBeginLoc(), getEndLoc());
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	89
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 void CloneDetector::analyzeCodeBody(const Decl *D) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 assert(D);
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 assert(D->hasBody());
1d019706d866 LLVM10 anatofuz parents: diff changeset	93
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 Sequences.push_back(StmtSequence(D->getBody(), D));
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	96
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 /// Returns true if and only if \p Stmt contains at least one other
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 /// sequence in the \p Group.
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 static bool containsAnyInGroup(StmtSequence &Seq,
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 CloneDetector::CloneGroup &Group) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 for (StmtSequence &GroupSeq : Group) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 if (Seq.contains(GroupSeq))
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	105 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	107
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 /// Returns true if and only if all sequences in \p OtherGroup are
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 /// contained by a sequence in \p Group.
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 static bool containsGroup(CloneDetector::CloneGroup &Group,
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 CloneDetector::CloneGroup &OtherGroup) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 // We have less sequences in the current group than we have in the other,
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 // so we will never fulfill the requirement for returning true. This is only
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 // possible because we know that a sequence in Group can contain at most
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 // one sequence in OtherGroup.
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 if (Group.size() < OtherGroup.size())
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	118
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 for (StmtSequence &Stmt : Group) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 if (!containsAnyInGroup(Stmt, OtherGroup))
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	122 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	123 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	125
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 void OnlyLargestCloneConstraint::constrain(
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 std::vector<CloneDetector::CloneGroup> &Result) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 std::vector<unsigned> IndexesToRemove;
1d019706d866 LLVM10 anatofuz parents: diff changeset	129
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 // Compare every group in the result with the rest. If one groups contains
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 // another group, we only need to return the bigger group.
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 // Note: This doesn't scale well, so if possible avoid calling any heavy
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 // function from this loop to minimize the performance impact.
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 for (unsigned i = 0; i < Result.size(); ++i) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	135 for (unsigned j = 0; j < Result.size(); ++j) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 // Don't compare a group with itself.
1d019706d866 LLVM10 anatofuz parents: diff changeset	137 if (i == j)
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	139
1d019706d866 LLVM10 anatofuz parents: diff changeset	140 if (containsGroup(Result[j], Result[i])) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	141 IndexesToRemove.push_back(i);
1d019706d866 LLVM10 anatofuz parents: diff changeset	142 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	143 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	144 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	145 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	146
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 // Erasing a list of indexes from the vector should be done with decreasing
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 // indexes. As IndexesToRemove is constructed with increasing values, we just
1d019706d866 LLVM10 anatofuz parents: diff changeset	149 // reverse iterate over it to get the desired order.
1d019706d866 LLVM10 anatofuz parents: diff changeset	150 for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 Result.erase(Result.begin() + *I);
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	154
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 bool FilenamePatternConstraint::isAutoGenerated(
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 const CloneDetector::CloneGroup &Group) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 if (IgnoredFilesPattern.empty() \|\| Group.empty() \|\|
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 !IgnoredFilesRegex->isValid())
1d019706d866 LLVM10 anatofuz parents: diff changeset	159 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	160
1d019706d866 LLVM10 anatofuz parents: diff changeset	161 for (const StmtSequence &S : Group) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	162 const SourceManager &SM = S.getASTContext().getSourceManager();
1d019706d866 LLVM10 anatofuz parents: diff changeset	163 StringRef Filename = llvm::sys::path::filename(
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 SM.getFilename(S.getContainingDecl()->getLocation()));
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 if (IgnoredFilesRegex->match(Filename))
1d019706d866 LLVM10 anatofuz parents: diff changeset	166 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	167 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	168
1d019706d866 LLVM10 anatofuz parents: diff changeset	169 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	170 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	171
1d019706d866 LLVM10 anatofuz parents: diff changeset	172 /// This class defines what a type II code clone is: If it collects for two
1d019706d866 LLVM10 anatofuz parents: diff changeset	173 /// statements the same data, then those two statements are considered to be
1d019706d866 LLVM10 anatofuz parents: diff changeset	174 /// clones of each other.
1d019706d866 LLVM10 anatofuz parents: diff changeset	175 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	176 /// All collected data is forwarded to the given data consumer of the type T.
1d019706d866 LLVM10 anatofuz parents: diff changeset	177 /// The data consumer class needs to provide a member method with the signature:
1d019706d866 LLVM10 anatofuz parents: diff changeset	178 /// update(StringRef Str)
1d019706d866 LLVM10 anatofuz parents: diff changeset	179 namespace {
1d019706d866 LLVM10 anatofuz parents: diff changeset	180 template <class T>
1d019706d866 LLVM10 anatofuz parents: diff changeset	181 class CloneTypeIIStmtDataCollector
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 : public ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>> {
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 ASTContext &Context;
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 /// The data sink to which all data is forwarded.
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 T &DataConsumer;
1d019706d866 LLVM10 anatofuz parents: diff changeset	186
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 template <class Ty> void addData(const Ty &Data) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 data_collection::addDataToConsumer(DataConsumer, Data);
1d019706d866 LLVM10 anatofuz parents: diff changeset	189 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	190
1d019706d866 LLVM10 anatofuz parents: diff changeset	191 public:
1d019706d866 LLVM10 anatofuz parents: diff changeset	192 CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context,
1d019706d866 LLVM10 anatofuz parents: diff changeset	193 T &DataConsumer)
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 : Context(Context), DataConsumer(DataConsumer) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	195 this->Visit(S);
1d019706d866 LLVM10 anatofuz parents: diff changeset	196 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	197
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 // Define a visit method for each class to collect data and subsequently visit
1d019706d866 LLVM10 anatofuz parents: diff changeset	199 // all parent classes. This uses a template so that custom visit methods by us
1d019706d866 LLVM10 anatofuz parents: diff changeset	200 // take precedence.
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 #define DEF_ADD_DATA(CLASS, CODE) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	202 template <class = void> void Visit##CLASS(const CLASS *S) { \
1d019706d866 LLVM10 anatofuz parents: diff changeset	203 CODE; \
1d019706d866 LLVM10 anatofuz parents: diff changeset	204 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
1d019706d866 LLVM10 anatofuz parents: diff changeset	205 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	206
1d019706d866 LLVM10 anatofuz parents: diff changeset	207 #include "clang/AST/StmtDataCollectors.inc"
1d019706d866 LLVM10 anatofuz parents: diff changeset	208
1d019706d866 LLVM10 anatofuz parents: diff changeset	209 // Type II clones ignore variable names and literals, so let's skip them.
1d019706d866 LLVM10 anatofuz parents: diff changeset	210 #define SKIP(CLASS) \
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 void Visit##CLASS(const CLASS *S) { \
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 SKIP(DeclRefExpr)
1d019706d866 LLVM10 anatofuz parents: diff changeset	215 SKIP(MemberExpr)
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 SKIP(IntegerLiteral)
1d019706d866 LLVM10 anatofuz parents: diff changeset	217 SKIP(FloatingLiteral)
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 SKIP(StringLiteral)
1d019706d866 LLVM10 anatofuz parents: diff changeset	219 SKIP(CXXBoolLiteralExpr)
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 SKIP(CharacterLiteral)
1d019706d866 LLVM10 anatofuz parents: diff changeset	221 #undef SKIP
1d019706d866 LLVM10 anatofuz parents: diff changeset	222 };
1d019706d866 LLVM10 anatofuz parents: diff changeset	223 } // end anonymous namespace
1d019706d866 LLVM10 anatofuz parents: diff changeset	224
1d019706d866 LLVM10 anatofuz parents: diff changeset	225 static size_t createHash(llvm::MD5 &Hash) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	226 size_t HashCode;
1d019706d866 LLVM10 anatofuz parents: diff changeset	227
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 // Create the final hash code for the current Stmt.
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 llvm::MD5::MD5Result HashResult;
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 Hash.final(HashResult);
1d019706d866 LLVM10 anatofuz parents: diff changeset	231
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 // Copy as much as possible of the generated hash code to the Stmt's hash
1d019706d866 LLVM10 anatofuz parents: diff changeset	233 // code.
1d019706d866 LLVM10 anatofuz parents: diff changeset	234 std::memcpy(&HashCode, &HashResult,
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 std::min(sizeof(HashCode), sizeof(HashResult)));
1d019706d866 LLVM10 anatofuz parents: diff changeset	236
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 return HashCode;
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	239
1d019706d866 LLVM10 anatofuz parents: diff changeset	240 /// Generates and saves a hash code for the given Stmt.
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 /// \param S The given Stmt.
1d019706d866 LLVM10 anatofuz parents: diff changeset	242 /// \param D The Decl containing S.
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 /// \param StmtsByHash Output parameter that will contain the hash codes for
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 /// each StmtSequence in the given Stmt.
1d019706d866 LLVM10 anatofuz parents: diff changeset	245 /// \return The hash code of the given Stmt.
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 /// If the given Stmt is a CompoundStmt, this method will also generate
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 /// hashes for all possible StmtSequences in the children of this Stmt.
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 static size_t
1d019706d866 LLVM10 anatofuz parents: diff changeset	250 saveHash(const Stmt S, const Decl D,
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 llvm::MD5 Hash;
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 ASTContext &Context = D->getASTContext();
1d019706d866 LLVM10 anatofuz parents: diff changeset	254
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash);
1d019706d866 LLVM10 anatofuz parents: diff changeset	256
1d019706d866 LLVM10 anatofuz parents: diff changeset	257 auto CS = dyn_cast<CompoundStmt>(S);
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 SmallVector<size_t, 8> ChildHashes;
1d019706d866 LLVM10 anatofuz parents: diff changeset	259
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 for (const Stmt *Child : S->children()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	261 if (Child == nullptr) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	262 ChildHashes.push_back(0);
1d019706d866 LLVM10 anatofuz parents: diff changeset	263 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	264 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	265 size_t ChildHash = saveHash(Child, D, StmtsByHash);
1d019706d866 LLVM10 anatofuz parents: diff changeset	266 Hash.update(
1d019706d866 LLVM10 anatofuz parents: diff changeset	267 StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
1d019706d866 LLVM10 anatofuz parents: diff changeset	268 ChildHashes.push_back(ChildHash);
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	270
1d019706d866 LLVM10 anatofuz parents: diff changeset	271 if (CS) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	272 // If we're in a CompoundStmt, we hash all possible combinations of child
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 // statements to find clones in those subsequences.
1d019706d866 LLVM10 anatofuz parents: diff changeset	274 // We first go through every possible starting position of a subsequence.
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 for (unsigned Pos = 0; Pos < CS->size(); ++Pos) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	276 // Then we try all possible lengths this subsequence could have and
1d019706d866 LLVM10 anatofuz parents: diff changeset	277 // reuse the same hash object to make sure we only hash every child
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 // hash exactly once.
1d019706d866 LLVM10 anatofuz parents: diff changeset	279 llvm::MD5 Hash;
1d019706d866 LLVM10 anatofuz parents: diff changeset	280 for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 // Grab the current child hash and put it into our hash. We do
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 // -1 on the index because we start counting the length at 1.
1d019706d866 LLVM10 anatofuz parents: diff changeset	283 size_t ChildHash = ChildHashes[Pos + Length - 1];
1d019706d866 LLVM10 anatofuz parents: diff changeset	284 Hash.update(
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 // If we have at least two elements in our subsequence, we can start
1d019706d866 LLVM10 anatofuz parents: diff changeset	287 // saving it.
1d019706d866 LLVM10 anatofuz parents: diff changeset	288 if (Length > 1) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	289 llvm::MD5 SubHash = Hash;
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 StmtsByHash.push_back(std::make_pair(
1d019706d866 LLVM10 anatofuz parents: diff changeset	291 createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length)));
1d019706d866 LLVM10 anatofuz parents: diff changeset	292 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	293 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	294 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	296
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 size_t HashCode = createHash(Hash);
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D)));
1d019706d866 LLVM10 anatofuz parents: diff changeset	299 return HashCode;
1d019706d866 LLVM10 anatofuz parents: diff changeset	300 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	301
1d019706d866 LLVM10 anatofuz parents: diff changeset	302 namespace {
1d019706d866 LLVM10 anatofuz parents: diff changeset	303 /// Wrapper around FoldingSetNodeID that it can be used as the template
1d019706d866 LLVM10 anatofuz parents: diff changeset	304 /// argument of the StmtDataCollector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	305 class FoldingSetNodeIDWrapper {
1d019706d866 LLVM10 anatofuz parents: diff changeset	306
1d019706d866 LLVM10 anatofuz parents: diff changeset	307 llvm::FoldingSetNodeID &FS;
1d019706d866 LLVM10 anatofuz parents: diff changeset	308
1d019706d866 LLVM10 anatofuz parents: diff changeset	309 public:
1d019706d866 LLVM10 anatofuz parents: diff changeset	310 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
1d019706d866 LLVM10 anatofuz parents: diff changeset	311
1d019706d866 LLVM10 anatofuz parents: diff changeset	312 void update(StringRef Str) { FS.AddString(Str); }
1d019706d866 LLVM10 anatofuz parents: diff changeset	313 };
1d019706d866 LLVM10 anatofuz parents: diff changeset	314 } // end anonymous namespace
1d019706d866 LLVM10 anatofuz parents: diff changeset	315
1d019706d866 LLVM10 anatofuz parents: diff changeset	316 /// Writes the relevant data from all statements and child statements
1d019706d866 LLVM10 anatofuz parents: diff changeset	317 /// in the given StmtSequence into the given FoldingSetNodeID.
1d019706d866 LLVM10 anatofuz parents: diff changeset	318 static void CollectStmtSequenceData(const StmtSequence &Sequence,
1d019706d866 LLVM10 anatofuz parents: diff changeset	319 FoldingSetNodeIDWrapper &OutputData) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	320 for (const Stmt *S : Sequence) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	321 CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>(
1d019706d866 LLVM10 anatofuz parents: diff changeset	322 S, Sequence.getASTContext(), OutputData);
1d019706d866 LLVM10 anatofuz parents: diff changeset	323
1d019706d866 LLVM10 anatofuz parents: diff changeset	324 for (const Stmt *Child : S->children()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	325 if (!Child)
1d019706d866 LLVM10 anatofuz parents: diff changeset	326 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	327
1d019706d866 LLVM10 anatofuz parents: diff changeset	328 CollectStmtSequenceData(StmtSequence(Child, Sequence.getContainingDecl()),
1d019706d866 LLVM10 anatofuz parents: diff changeset	329 OutputData);
1d019706d866 LLVM10 anatofuz parents: diff changeset	330 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	331 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	332 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	333
1d019706d866 LLVM10 anatofuz parents: diff changeset	334 /// Returns true if both sequences are clones of each other.
1d019706d866 LLVM10 anatofuz parents: diff changeset	335 static bool areSequencesClones(const StmtSequence &LHS,
1d019706d866 LLVM10 anatofuz parents: diff changeset	336 const StmtSequence &RHS) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	337 // We collect the data from all statements in the sequence as we did before
1d019706d866 LLVM10 anatofuz parents: diff changeset	338 // when generating a hash value for each sequence. But this time we don't
1d019706d866 LLVM10 anatofuz parents: diff changeset	339 // hash the collected data and compare the whole data set instead. This
1d019706d866 LLVM10 anatofuz parents: diff changeset	340 // prevents any false-positives due to hash code collisions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	341 llvm::FoldingSetNodeID DataLHS, DataRHS;
1d019706d866 LLVM10 anatofuz parents: diff changeset	342 FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
1d019706d866 LLVM10 anatofuz parents: diff changeset	343 FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
1d019706d866 LLVM10 anatofuz parents: diff changeset	344
1d019706d866 LLVM10 anatofuz parents: diff changeset	345 CollectStmtSequenceData(LHS, LHSWrapper);
1d019706d866 LLVM10 anatofuz parents: diff changeset	346 CollectStmtSequenceData(RHS, RHSWrapper);
1d019706d866 LLVM10 anatofuz parents: diff changeset	347
1d019706d866 LLVM10 anatofuz parents: diff changeset	348 return DataLHS == DataRHS;
1d019706d866 LLVM10 anatofuz parents: diff changeset	349 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	350
1d019706d866 LLVM10 anatofuz parents: diff changeset	351 void RecursiveCloneTypeIIHashConstraint::constrain(
1d019706d866 LLVM10 anatofuz parents: diff changeset	352 std::vector<CloneDetector::CloneGroup> &Sequences) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	353 // FIXME: Maybe we can do this in-place and don't need this additional vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	354 std::vector<CloneDetector::CloneGroup> Result;
1d019706d866 LLVM10 anatofuz parents: diff changeset	355
1d019706d866 LLVM10 anatofuz parents: diff changeset	356 for (CloneDetector::CloneGroup &Group : Sequences) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	357 // We assume in the following code that the Group is non-empty, so we
1d019706d866 LLVM10 anatofuz parents: diff changeset	358 // skip all empty groups.
1d019706d866 LLVM10 anatofuz parents: diff changeset	359 if (Group.empty())
1d019706d866 LLVM10 anatofuz parents: diff changeset	360 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	361
1d019706d866 LLVM10 anatofuz parents: diff changeset	362 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
1d019706d866 LLVM10 anatofuz parents: diff changeset	363
1d019706d866 LLVM10 anatofuz parents: diff changeset	364 // Generate hash codes for all children of S and save them in StmtsByHash.
1d019706d866 LLVM10 anatofuz parents: diff changeset	365 for (const StmtSequence &S : Group) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	366 saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
1d019706d866 LLVM10 anatofuz parents: diff changeset	367 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	368
1d019706d866 LLVM10 anatofuz parents: diff changeset	369 // Sort hash_codes in StmtsByHash.
1d019706d866 LLVM10 anatofuz parents: diff changeset	370 llvm::stable_sort(StmtsByHash, llvm::less_first());
1d019706d866 LLVM10 anatofuz parents: diff changeset	371
1d019706d866 LLVM10 anatofuz parents: diff changeset	372 // Check for each StmtSequence if its successor has the same hash value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	373 // We don't check the last StmtSequence as it has no successor.
1d019706d866 LLVM10 anatofuz parents: diff changeset	374 // Note: The 'size - 1 ' in the condition is safe because we check for an
1d019706d866 LLVM10 anatofuz parents: diff changeset	375 // empty Group vector at the beginning of this function.
1d019706d866 LLVM10 anatofuz parents: diff changeset	376 for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	377 const auto Current = StmtsByHash[i];
1d019706d866 LLVM10 anatofuz parents: diff changeset	378
1d019706d866 LLVM10 anatofuz parents: diff changeset	379 // It's likely that we just found a sequence of StmtSequences that
1d019706d866 LLVM10 anatofuz parents: diff changeset	380 // represent a CloneGroup, so we create a new group and start checking and
1d019706d866 LLVM10 anatofuz parents: diff changeset	381 // adding the StmtSequences in this sequence.
1d019706d866 LLVM10 anatofuz parents: diff changeset	382 CloneDetector::CloneGroup NewGroup;
1d019706d866 LLVM10 anatofuz parents: diff changeset	383
1d019706d866 LLVM10 anatofuz parents: diff changeset	384 size_t PrototypeHash = Current.first;
1d019706d866 LLVM10 anatofuz parents: diff changeset	385
1d019706d866 LLVM10 anatofuz parents: diff changeset	386 for (; i < StmtsByHash.size(); ++i) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	387 // A different hash value means we have reached the end of the sequence.
1d019706d866 LLVM10 anatofuz parents: diff changeset	388 if (PrototypeHash != StmtsByHash[i].first) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	389 // The current sequence could be the start of a new CloneGroup. So we
1d019706d866 LLVM10 anatofuz parents: diff changeset	390 // decrement i so that we visit it again in the outer loop.
1d019706d866 LLVM10 anatofuz parents: diff changeset	391 // Note: i can never be 0 at this point because we are just comparing
1d019706d866 LLVM10 anatofuz parents: diff changeset	392 // the hash of the Current StmtSequence with itself in the 'if' above.
1d019706d866 LLVM10 anatofuz parents: diff changeset	393 assert(i != 0);
1d019706d866 LLVM10 anatofuz parents: diff changeset	394 --i;
1d019706d866 LLVM10 anatofuz parents: diff changeset	395 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	396 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	397 // Same hash value means we should add the StmtSequence to the current
1d019706d866 LLVM10 anatofuz parents: diff changeset	398 // group.
1d019706d866 LLVM10 anatofuz parents: diff changeset	399 NewGroup.push_back(StmtsByHash[i].second);
1d019706d866 LLVM10 anatofuz parents: diff changeset	400 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	401
1d019706d866 LLVM10 anatofuz parents: diff changeset	402 // We created a new clone group with matching hash codes and move it to
1d019706d866 LLVM10 anatofuz parents: diff changeset	403 // the result vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	404 Result.push_back(NewGroup);
1d019706d866 LLVM10 anatofuz parents: diff changeset	405 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	406 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	407 // Sequences is the output parameter, so we copy our result into it.
1d019706d866 LLVM10 anatofuz parents: diff changeset	408 Sequences = Result;
1d019706d866 LLVM10 anatofuz parents: diff changeset	409 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	410
1d019706d866 LLVM10 anatofuz parents: diff changeset	411 void RecursiveCloneTypeIIVerifyConstraint::constrain(
1d019706d866 LLVM10 anatofuz parents: diff changeset	412 std::vector<CloneDetector::CloneGroup> &Sequences) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	413 CloneConstraint::splitCloneGroups(
1d019706d866 LLVM10 anatofuz parents: diff changeset	414 Sequences, [](const StmtSequence &A, const StmtSequence &B) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	415 return areSequencesClones(A, B);
1d019706d866 LLVM10 anatofuz parents: diff changeset	416 });
1d019706d866 LLVM10 anatofuz parents: diff changeset	417 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	418
1d019706d866 LLVM10 anatofuz parents: diff changeset	419 size_t MinComplexityConstraint::calculateStmtComplexity(
1d019706d866 LLVM10 anatofuz parents: diff changeset	420 const StmtSequence &Seq, std::size_t Limit,
1d019706d866 LLVM10 anatofuz parents: diff changeset	421 const std::string &ParentMacroStack) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	422 if (Seq.empty())
1d019706d866 LLVM10 anatofuz parents: diff changeset	423 return 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	424
1d019706d866 LLVM10 anatofuz parents: diff changeset	425 size_t Complexity = 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	426
1d019706d866 LLVM10 anatofuz parents: diff changeset	427 ASTContext &Context = Seq.getASTContext();
1d019706d866 LLVM10 anatofuz parents: diff changeset	428
1d019706d866 LLVM10 anatofuz parents: diff changeset	429 // Look up what macros expanded into the current statement.
1d019706d866 LLVM10 anatofuz parents: diff changeset	430 std::string MacroStack =
1d019706d866 LLVM10 anatofuz parents: diff changeset	431 data_collection::getMacroStack(Seq.getBeginLoc(), Context);
1d019706d866 LLVM10 anatofuz parents: diff changeset	432
1d019706d866 LLVM10 anatofuz parents: diff changeset	433 // First, check if ParentMacroStack is not empty which means we are currently
1d019706d866 LLVM10 anatofuz parents: diff changeset	434 // dealing with a parent statement which was expanded from a macro.
1d019706d866 LLVM10 anatofuz parents: diff changeset	435 // If this parent statement was expanded from the same macros as this
1d019706d866 LLVM10 anatofuz parents: diff changeset	436 // statement, we reduce the initial complexity of this statement to zero.
1d019706d866 LLVM10 anatofuz parents: diff changeset	437 // This causes that a group of statements that were generated by a single
1d019706d866 LLVM10 anatofuz parents: diff changeset	438 // macro expansion will only increase the total complexity by one.
1d019706d866 LLVM10 anatofuz parents: diff changeset	439 // Note: This is not the final complexity of this statement as we still
1d019706d866 LLVM10 anatofuz parents: diff changeset	440 // add the complexity of the child statements to the complexity value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	441 if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	442 Complexity = 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	443 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	444
1d019706d866 LLVM10 anatofuz parents: diff changeset	445 // Iterate over the Stmts in the StmtSequence and add their complexity values
1d019706d866 LLVM10 anatofuz parents: diff changeset	446 // to the current complexity value.
1d019706d866 LLVM10 anatofuz parents: diff changeset	447 if (Seq.holdsSequence()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	448 for (const Stmt *S : Seq) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	449 Complexity += calculateStmtComplexity(
1d019706d866 LLVM10 anatofuz parents: diff changeset	450 StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);
1d019706d866 LLVM10 anatofuz parents: diff changeset	451 if (Complexity >= Limit)
1d019706d866 LLVM10 anatofuz parents: diff changeset	452 return Limit;
1d019706d866 LLVM10 anatofuz parents: diff changeset	453 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	454 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	455 for (const Stmt *S : Seq.front()->children()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	456 Complexity += calculateStmtComplexity(
1d019706d866 LLVM10 anatofuz parents: diff changeset	457 StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);
1d019706d866 LLVM10 anatofuz parents: diff changeset	458 if (Complexity >= Limit)
1d019706d866 LLVM10 anatofuz parents: diff changeset	459 return Limit;
1d019706d866 LLVM10 anatofuz parents: diff changeset	460 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	461 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	462 return Complexity;
1d019706d866 LLVM10 anatofuz parents: diff changeset	463 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	464
1d019706d866 LLVM10 anatofuz parents: diff changeset	465 void MatchingVariablePatternConstraint::constrain(
1d019706d866 LLVM10 anatofuz parents: diff changeset	466 std::vector<CloneDetector::CloneGroup> &CloneGroups) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	467 CloneConstraint::splitCloneGroups(
1d019706d866 LLVM10 anatofuz parents: diff changeset	468 CloneGroups, [](const StmtSequence &A, const StmtSequence &B) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	469 VariablePattern PatternA(A);
1d019706d866 LLVM10 anatofuz parents: diff changeset	470 VariablePattern PatternB(B);
1d019706d866 LLVM10 anatofuz parents: diff changeset	471 return PatternA.countPatternDifferences(PatternB) == 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	472 });
1d019706d866 LLVM10 anatofuz parents: diff changeset	473 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	474
1d019706d866 LLVM10 anatofuz parents: diff changeset	475 void CloneConstraint::splitCloneGroups(
1d019706d866 LLVM10 anatofuz parents: diff changeset	476 std::vector<CloneDetector::CloneGroup> &CloneGroups,
1d019706d866 LLVM10 anatofuz parents: diff changeset	477 llvm::function_ref<bool(const StmtSequence &, const StmtSequence &)>
1d019706d866 LLVM10 anatofuz parents: diff changeset	478 Compare) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	479 std::vector<CloneDetector::CloneGroup> Result;
1d019706d866 LLVM10 anatofuz parents: diff changeset	480 for (auto &HashGroup : CloneGroups) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	481 // Contains all indexes in HashGroup that were already added to a
1d019706d866 LLVM10 anatofuz parents: diff changeset	482 // CloneGroup.
1d019706d866 LLVM10 anatofuz parents: diff changeset	483 std::vector<char> Indexes;
1d019706d866 LLVM10 anatofuz parents: diff changeset	484 Indexes.resize(HashGroup.size());
1d019706d866 LLVM10 anatofuz parents: diff changeset	485
1d019706d866 LLVM10 anatofuz parents: diff changeset	486 for (unsigned i = 0; i < HashGroup.size(); ++i) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	487 // Skip indexes that are already part of a CloneGroup.
1d019706d866 LLVM10 anatofuz parents: diff changeset	488 if (Indexes[i])
1d019706d866 LLVM10 anatofuz parents: diff changeset	489 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	490
1d019706d866 LLVM10 anatofuz parents: diff changeset	491 // Pick the first unhandled StmtSequence and consider it as the
1d019706d866 LLVM10 anatofuz parents: diff changeset	492 // beginning
1d019706d866 LLVM10 anatofuz parents: diff changeset	493 // of a new CloneGroup for now.
1d019706d866 LLVM10 anatofuz parents: diff changeset	494 // We don't add i to Indexes because we never iterate back.
1d019706d866 LLVM10 anatofuz parents: diff changeset	495 StmtSequence Prototype = HashGroup[i];
1d019706d866 LLVM10 anatofuz parents: diff changeset	496 CloneDetector::CloneGroup PotentialGroup = {Prototype};
1d019706d866 LLVM10 anatofuz parents: diff changeset	497 ++Indexes[i];
1d019706d866 LLVM10 anatofuz parents: diff changeset	498
1d019706d866 LLVM10 anatofuz parents: diff changeset	499 // Check all following StmtSequences for clones.
1d019706d866 LLVM10 anatofuz parents: diff changeset	500 for (unsigned j = i + 1; j < HashGroup.size(); ++j) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	501 // Skip indexes that are already part of a CloneGroup.
1d019706d866 LLVM10 anatofuz parents: diff changeset	502 if (Indexes[j])
1d019706d866 LLVM10 anatofuz parents: diff changeset	503 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	504
1d019706d866 LLVM10 anatofuz parents: diff changeset	505 // If a following StmtSequence belongs to our CloneGroup, we add it.
1d019706d866 LLVM10 anatofuz parents: diff changeset	506 const StmtSequence &Candidate = HashGroup[j];
1d019706d866 LLVM10 anatofuz parents: diff changeset	507
1d019706d866 LLVM10 anatofuz parents: diff changeset	508 if (!Compare(Prototype, Candidate))
1d019706d866 LLVM10 anatofuz parents: diff changeset	509 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	510
1d019706d866 LLVM10 anatofuz parents: diff changeset	511 PotentialGroup.push_back(Candidate);
1d019706d866 LLVM10 anatofuz parents: diff changeset	512 // Make sure we never visit this StmtSequence again.
1d019706d866 LLVM10 anatofuz parents: diff changeset	513 ++Indexes[j];
1d019706d866 LLVM10 anatofuz parents: diff changeset	514 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	515
1d019706d866 LLVM10 anatofuz parents: diff changeset	516 // Otherwise, add it to the result and continue searching for more
1d019706d866 LLVM10 anatofuz parents: diff changeset	517 // groups.
1d019706d866 LLVM10 anatofuz parents: diff changeset	518 Result.push_back(PotentialGroup);
1d019706d866 LLVM10 anatofuz parents: diff changeset	519 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	520
1d019706d866 LLVM10 anatofuz parents: diff changeset	521 assert(llvm::all_of(Indexes, [](char c) { return c == 1; }));
1d019706d866 LLVM10 anatofuz parents: diff changeset	522 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	523 CloneGroups = Result;
1d019706d866 LLVM10 anatofuz parents: diff changeset	524 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	525
1d019706d866 LLVM10 anatofuz parents: diff changeset	526 void VariablePattern::addVariableOccurence(const VarDecl *VarDecl,
1d019706d866 LLVM10 anatofuz parents: diff changeset	527 const Stmt *Mention) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	528 // First check if we already reference this variable
1d019706d866 LLVM10 anatofuz parents: diff changeset	529 for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	530 if (Variables[KindIndex] == VarDecl) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	531 // If yes, add a new occurrence that points to the existing entry in
1d019706d866 LLVM10 anatofuz parents: diff changeset	532 // the Variables vector.
1d019706d866 LLVM10 anatofuz parents: diff changeset	533 Occurences.emplace_back(KindIndex, Mention);
1d019706d866 LLVM10 anatofuz parents: diff changeset	534 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	535 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	536 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	537 // If this variable wasn't already referenced, add it to the list of
1d019706d866 LLVM10 anatofuz parents: diff changeset	538 // referenced variables and add a occurrence that points to this new entry.
1d019706d866 LLVM10 anatofuz parents: diff changeset	539 Occurences.emplace_back(Variables.size(), Mention);
1d019706d866 LLVM10 anatofuz parents: diff changeset	540 Variables.push_back(VarDecl);
1d019706d866 LLVM10 anatofuz parents: diff changeset	541 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	542
1d019706d866 LLVM10 anatofuz parents: diff changeset	543 void VariablePattern::addVariables(const Stmt *S) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	544 // Sometimes we get a nullptr (such as from IfStmts which often have nullptr
1d019706d866 LLVM10 anatofuz parents: diff changeset	545 // children). We skip such statements as they don't reference any
1d019706d866 LLVM10 anatofuz parents: diff changeset	546 // variables.
1d019706d866 LLVM10 anatofuz parents: diff changeset	547 if (!S)
1d019706d866 LLVM10 anatofuz parents: diff changeset	548 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	549
1d019706d866 LLVM10 anatofuz parents: diff changeset	550 // Check if S is a reference to a variable. If yes, add it to the pattern.
1d019706d866 LLVM10 anatofuz parents: diff changeset	551 if (auto D = dyn_cast<DeclRefExpr>(S)) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	552 if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl()))
1d019706d866 LLVM10 anatofuz parents: diff changeset	553 addVariableOccurence(VD, D);
1d019706d866 LLVM10 anatofuz parents: diff changeset	554 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	555
1d019706d866 LLVM10 anatofuz parents: diff changeset	556 // Recursively check all children of the given statement.
1d019706d866 LLVM10 anatofuz parents: diff changeset	557 for (const Stmt *Child : S->children()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	558 addVariables(Child);
1d019706d866 LLVM10 anatofuz parents: diff changeset	559 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	560 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	561
1d019706d866 LLVM10 anatofuz parents: diff changeset	562 unsigned VariablePattern::countPatternDifferences(
1d019706d866 LLVM10 anatofuz parents: diff changeset	563 const VariablePattern &Other,
1d019706d866 LLVM10 anatofuz parents: diff changeset	564 VariablePattern::SuspiciousClonePair *FirstMismatch) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	565 unsigned NumberOfDifferences = 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	566
1d019706d866 LLVM10 anatofuz parents: diff changeset	567 assert(Other.Occurences.size() == Occurences.size());
1d019706d866 LLVM10 anatofuz parents: diff changeset	568 for (unsigned i = 0; i < Occurences.size(); ++i) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	569 auto ThisOccurence = Occurences[i];
1d019706d866 LLVM10 anatofuz parents: diff changeset	570 auto OtherOccurence = Other.Occurences[i];
1d019706d866 LLVM10 anatofuz parents: diff changeset	571 if (ThisOccurence.KindID == OtherOccurence.KindID)
1d019706d866 LLVM10 anatofuz parents: diff changeset	572 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	573
1d019706d866 LLVM10 anatofuz parents: diff changeset	574 ++NumberOfDifferences;
1d019706d866 LLVM10 anatofuz parents: diff changeset	575
1d019706d866 LLVM10 anatofuz parents: diff changeset	576 // If FirstMismatch is not a nullptr, we need to store information about
1d019706d866 LLVM10 anatofuz parents: diff changeset	577 // the first difference between the two patterns.
1d019706d866 LLVM10 anatofuz parents: diff changeset	578 if (FirstMismatch == nullptr)
1d019706d866 LLVM10 anatofuz parents: diff changeset	579 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	580
1d019706d866 LLVM10 anatofuz parents: diff changeset	581 // Only proceed if we just found the first difference as we only store
1d019706d866 LLVM10 anatofuz parents: diff changeset	582 // information about the first difference.
1d019706d866 LLVM10 anatofuz parents: diff changeset	583 if (NumberOfDifferences != 1)
1d019706d866 LLVM10 anatofuz parents: diff changeset	584 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	585
1d019706d866 LLVM10 anatofuz parents: diff changeset	586 const VarDecl *FirstSuggestion = nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	587 // If there is a variable available in the list of referenced variables
1d019706d866 LLVM10 anatofuz parents: diff changeset	588 // which wouldn't break the pattern if it is used in place of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	589 // current variable, we provide this variable as the suggested fix.
1d019706d866 LLVM10 anatofuz parents: diff changeset	590 if (OtherOccurence.KindID < Variables.size())
1d019706d866 LLVM10 anatofuz parents: diff changeset	591 FirstSuggestion = Variables[OtherOccurence.KindID];
1d019706d866 LLVM10 anatofuz parents: diff changeset	592
1d019706d866 LLVM10 anatofuz parents: diff changeset	593 // Store information about the first clone.
1d019706d866 LLVM10 anatofuz parents: diff changeset	594 FirstMismatch->FirstCloneInfo =
1d019706d866 LLVM10 anatofuz parents: diff changeset	595 VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo(
1d019706d866 LLVM10 anatofuz parents: diff changeset	596 Variables[ThisOccurence.KindID], ThisOccurence.Mention,
1d019706d866 LLVM10 anatofuz parents: diff changeset	597 FirstSuggestion);
1d019706d866 LLVM10 anatofuz parents: diff changeset	598
1d019706d866 LLVM10 anatofuz parents: diff changeset	599 // Same as above but with the other clone. We do this for both clones as
1d019706d866 LLVM10 anatofuz parents: diff changeset	600 // we don't know which clone is the one containing the unintended
1d019706d866 LLVM10 anatofuz parents: diff changeset	601 // pattern error.
1d019706d866 LLVM10 anatofuz parents: diff changeset	602 const VarDecl *SecondSuggestion = nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	603 if (ThisOccurence.KindID < Other.Variables.size())
1d019706d866 LLVM10 anatofuz parents: diff changeset	604 SecondSuggestion = Other.Variables[ThisOccurence.KindID];
1d019706d866 LLVM10 anatofuz parents: diff changeset	605
1d019706d866 LLVM10 anatofuz parents: diff changeset	606 // Store information about the second clone.
1d019706d866 LLVM10 anatofuz parents: diff changeset	607 FirstMismatch->SecondCloneInfo =
1d019706d866 LLVM10 anatofuz parents: diff changeset	608 VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo(
1d019706d866 LLVM10 anatofuz parents: diff changeset	609 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
1d019706d866 LLVM10 anatofuz parents: diff changeset	610 SecondSuggestion);
1d019706d866 LLVM10 anatofuz parents: diff changeset	611
1d019706d866 LLVM10 anatofuz parents: diff changeset	612 // SuspiciousClonePair guarantees that the first clone always has a
1d019706d866 LLVM10 anatofuz parents: diff changeset	613 // suggested variable associated with it. As we know that one of the two
1d019706d866 LLVM10 anatofuz parents: diff changeset	614 // clones in the pair always has suggestion, we swap the two clones
1d019706d866 LLVM10 anatofuz parents: diff changeset	615 // in case the first clone has no suggested variable which means that
1d019706d866 LLVM10 anatofuz parents: diff changeset	616 // the second clone has a suggested variable and should be first.
1d019706d866 LLVM10 anatofuz parents: diff changeset	617 if (!FirstMismatch->FirstCloneInfo.Suggestion)
1d019706d866 LLVM10 anatofuz parents: diff changeset	618 std::swap(FirstMismatch->FirstCloneInfo, FirstMismatch->SecondCloneInfo);
1d019706d866 LLVM10 anatofuz parents: diff changeset	619
1d019706d866 LLVM10 anatofuz parents: diff changeset	620 // This ensures that we always have at least one suggestion in a pair.
1d019706d866 LLVM10 anatofuz parents: diff changeset	621 assert(FirstMismatch->FirstCloneInfo.Suggestion);
1d019706d866 LLVM10 anatofuz parents: diff changeset	622 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	623
1d019706d866 LLVM10 anatofuz parents: diff changeset	624 return NumberOfDifferences;
1d019706d866 LLVM10 anatofuz parents: diff changeset	625 }

Mercurial > hg > CbC > CbC_llvm

annotate clang/lib/Analysis/CloneDetection.cpp @ 176:de4ac79aef9d