150
|
1 //===--- Selection.cpp ----------------------------------------------------===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8
|
|
9 #include "Selection.h"
|
|
10 #include "SourceCode.h"
|
173
|
11 #include "support/Logger.h"
|
|
12 #include "support/Trace.h"
|
150
|
13 #include "clang/AST/ASTTypeTraits.h"
|
173
|
14 #include "clang/AST/Decl.h"
|
150
|
15 #include "clang/AST/DeclCXX.h"
|
|
16 #include "clang/AST/Expr.h"
|
|
17 #include "clang/AST/ExprCXX.h"
|
|
18 #include "clang/AST/PrettyPrinter.h"
|
|
19 #include "clang/AST/RecursiveASTVisitor.h"
|
|
20 #include "clang/AST/TypeLoc.h"
|
|
21 #include "clang/Basic/OperatorKinds.h"
|
|
22 #include "clang/Basic/SourceLocation.h"
|
|
23 #include "clang/Basic/SourceManager.h"
|
|
24 #include "clang/Basic/TokenKinds.h"
|
|
25 #include "clang/Lex/Lexer.h"
|
|
26 #include "clang/Tooling/Syntax/Tokens.h"
|
|
27 #include "llvm/ADT/STLExtras.h"
|
173
|
28 #include "llvm/ADT/StringExtras.h"
|
150
|
29 #include "llvm/Support/Casting.h"
|
|
30 #include "llvm/Support/raw_ostream.h"
|
|
31 #include <algorithm>
|
|
32 #include <string>
|
|
33
|
|
34 namespace clang {
|
|
35 namespace clangd {
|
|
36 namespace {
|
|
37 using Node = SelectionTree::Node;
|
|
38 using ast_type_traits::DynTypedNode;
|
|
39
|
173
|
40 // Measure the fraction of selections that were enabled by recovery AST.
|
|
41 void recordMetrics(const SelectionTree &S) {
|
|
42 static constexpr trace::Metric SelectionUsedRecovery(
|
|
43 "selection_recovery", trace::Metric::Distribution);
|
|
44 const auto *Common = S.commonAncestor();
|
|
45 for (const auto *N = Common; N; N = N->Parent) {
|
|
46 if (N->ASTNode.get<RecoveryExpr>()) {
|
|
47 SelectionUsedRecovery.record(1); // used recovery ast.
|
|
48 return;
|
|
49 }
|
|
50 }
|
|
51 if (Common)
|
|
52 SelectionUsedRecovery.record(0); // unused.
|
|
53 }
|
|
54
|
150
|
55 // An IntervalSet maintains a set of disjoint subranges of an array.
|
|
56 //
|
|
57 // Initially, it contains the entire array.
|
|
58 // [-----------------------------------------------------------]
|
|
59 //
|
|
60 // When a range is erased(), it will typically split the array in two.
|
|
61 // Claim: [--------------------]
|
|
62 // after: [----------------] [-------------------]
|
|
63 //
|
|
64 // erase() returns the segments actually erased. Given the state above:
|
|
65 // Claim: [---------------------------------------]
|
|
66 // Out: [---------] [------]
|
|
67 // After: [-----] [-----------]
|
|
68 //
|
|
69 // It is used to track (expanded) tokens not yet associated with an AST node.
|
|
70 // On traversing an AST node, its token range is erased from the unclaimed set.
|
|
71 // The tokens actually removed are associated with that node, and hit-tested
|
|
72 // against the selection to determine whether the node is selected.
|
173
|
73 template <typename T> class IntervalSet {
|
150
|
74 public:
|
|
75 IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
|
|
76
|
|
77 // Removes the elements of Claim from the set, modifying or removing ranges
|
|
78 // that overlap it.
|
|
79 // Returns the continuous subranges of Claim that were actually removed.
|
|
80 llvm::SmallVector<llvm::ArrayRef<T>, 4> erase(llvm::ArrayRef<T> Claim) {
|
|
81 llvm::SmallVector<llvm::ArrayRef<T>, 4> Out;
|
|
82 if (Claim.empty())
|
|
83 return Out;
|
|
84
|
|
85 // General case:
|
|
86 // Claim: [-----------------]
|
|
87 // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
|
|
88 // Overlap: ^first ^second
|
|
89 // Ranges C and D are fully included. Ranges B and E must be trimmed.
|
|
90 auto Overlap = std::make_pair(
|
|
91 UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
|
|
92 UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
|
|
93 // Rewind to cover B.
|
|
94 if (Overlap.first != UnclaimedRanges.begin()) {
|
|
95 --Overlap.first;
|
|
96 // ...unless B isn't selected at all.
|
|
97 if (Overlap.first->end() <= Claim.begin())
|
173
|
98 ++Overlap.first;
|
150
|
99 }
|
|
100 if (Overlap.first == Overlap.second)
|
|
101 return Out;
|
|
102
|
|
103 // First, copy all overlapping ranges into the output.
|
|
104 auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
|
|
105 // If any of the overlapping ranges were sliced by the claim, split them:
|
|
106 // - restrict the returned range to the claimed part
|
|
107 // - save the unclaimed part so it can be reinserted
|
|
108 llvm::ArrayRef<T> RemainingHead, RemainingTail;
|
|
109 if (Claim.begin() > OutFirst->begin()) {
|
|
110 RemainingHead = {OutFirst->begin(), Claim.begin()};
|
|
111 *OutFirst = {Claim.begin(), OutFirst->end()};
|
|
112 }
|
|
113 if (Claim.end() < Out.back().end()) {
|
|
114 RemainingTail = {Claim.end(), Out.back().end()};
|
|
115 Out.back() = {Out.back().begin(), Claim.end()};
|
|
116 }
|
|
117
|
|
118 // Erase all the overlapping ranges (invalidating all iterators).
|
|
119 UnclaimedRanges.erase(Overlap.first, Overlap.second);
|
|
120 // Reinsert ranges that were merely trimmed.
|
|
121 if (!RemainingHead.empty())
|
|
122 UnclaimedRanges.insert(RemainingHead);
|
|
123 if (!RemainingTail.empty())
|
|
124 UnclaimedRanges.insert(RemainingTail);
|
|
125
|
|
126 return Out;
|
|
127 }
|
|
128
|
|
129 private:
|
|
130 using TokenRange = llvm::ArrayRef<T>;
|
|
131 struct RangeLess {
|
|
132 bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
|
|
133 return L.begin() < R.begin();
|
|
134 }
|
|
135 };
|
|
136
|
|
137 // Disjoint sorted unclaimed ranges of expanded tokens.
|
173
|
138 std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges;
|
150
|
139 };
|
|
140
|
|
141 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
|
|
142 // This resolves to Unselected if no tokens are ever seen.
|
|
143 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
|
|
144 // This value is never exposed publicly.
|
|
145 constexpr SelectionTree::Selection NoTokens =
|
|
146 static_cast<SelectionTree::Selection>(
|
|
147 static_cast<unsigned char>(SelectionTree::Complete + 1));
|
|
148
|
|
149 // Nodes start with NoTokens, and then use this function to aggregate the
|
|
150 // selectedness as more tokens are found.
|
|
151 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
|
|
152 if (New == NoTokens)
|
|
153 return;
|
|
154 if (Result == NoTokens)
|
|
155 Result = New;
|
|
156 else if (Result != New)
|
|
157 // Can only be completely selected (or unselected) if all tokens are.
|
|
158 Result = SelectionTree::Partial;
|
|
159 }
|
|
160
|
173
|
161 // As well as comments, don't count semicolons as real tokens.
|
|
162 // They're not properly claimed as expr-statement is missing from the AST.
|
|
163 bool shouldIgnore(const syntax::Token &Tok) {
|
|
164 return Tok.kind() == tok::comment || Tok.kind() == tok::semi;
|
|
165 }
|
|
166
|
|
167 // Determine whether 'Target' is the first expansion of the macro
|
|
168 // argument whose top-level spelling location is 'SpellingLoc'.
|
|
169 bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc,
|
|
170 const SourceManager &SM) {
|
|
171 SourceLocation Prev = SpellingLoc;
|
|
172 while (true) {
|
|
173 // If the arg is expanded multiple times, getMacroArgExpandedLocation()
|
|
174 // returns the first expansion.
|
|
175 SourceLocation Next = SM.getMacroArgExpandedLocation(Prev);
|
|
176 // So if we reach the target, target is the first-expansion of the
|
|
177 // first-expansion ...
|
|
178 if (SM.getFileID(Next) == Target)
|
|
179 return true;
|
|
180
|
|
181 // Otherwise, if the FileID stops changing, we've reached the innermost
|
|
182 // macro expansion, and Target was on a different branch.
|
|
183 if (SM.getFileID(Next) == SM.getFileID(Prev))
|
|
184 return false;
|
|
185
|
|
186 Prev = Next;
|
|
187 }
|
|
188 return false;
|
|
189 }
|
150
|
190
|
|
191 // SelectionTester can determine whether a range of tokens from the PP-expanded
|
|
192 // stream (corresponding to an AST node) is considered selected.
|
|
193 //
|
|
194 // When the tokens result from macro expansions, the appropriate tokens in the
|
|
195 // main file are examined (macro invocation or args). Similarly for #includes.
|
173
|
196 // However, only the first expansion of a given spelled token is considered
|
|
197 // selected.
|
150
|
198 //
|
|
199 // It tests each token in the range (not just the endpoints) as contiguous
|
|
200 // expanded tokens may not have contiguous spellings (with macros).
|
|
201 //
|
|
202 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
|
|
203 // are ignored when determining selectedness.
|
|
204 class SelectionTester {
|
|
205 public:
|
|
206 // The selection is offsets [SelBegin, SelEnd) in SelFile.
|
|
207 SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
|
|
208 unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
|
|
209 : SelFile(SelFile), SM(SM) {
|
|
210 // Find all tokens (partially) selected in the file.
|
|
211 auto AllSpelledTokens = Buf.spelledTokens(SelFile);
|
|
212 const syntax::Token *SelFirst =
|
|
213 llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
|
|
214 return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
|
|
215 });
|
|
216 const syntax::Token *SelLimit = std::partition_point(
|
|
217 SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
|
|
218 return SM.getFileOffset(Tok.location()) < SelEnd;
|
|
219 });
|
|
220 // Precompute selectedness and offset for selected spelled tokens.
|
|
221 for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) {
|
173
|
222 if (shouldIgnore(*T))
|
150
|
223 continue;
|
|
224 SpelledTokens.emplace_back();
|
|
225 Tok &S = SpelledTokens.back();
|
|
226 S.Offset = SM.getFileOffset(T->location());
|
|
227 if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd)
|
|
228 S.Selected = SelectionTree::Complete;
|
|
229 else
|
|
230 S.Selected = SelectionTree::Partial;
|
|
231 }
|
|
232 }
|
|
233
|
|
234 // Test whether a consecutive range of tokens is selected.
|
|
235 // The tokens are taken from the expanded token stream.
|
|
236 SelectionTree::Selection
|
|
237 test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
|
|
238 if (SpelledTokens.empty())
|
|
239 return NoTokens;
|
|
240 SelectionTree::Selection Result = NoTokens;
|
|
241 while (!ExpandedTokens.empty()) {
|
|
242 // Take consecutive tokens from the same context together for efficiency.
|
|
243 FileID FID = SM.getFileID(ExpandedTokens.front().location());
|
|
244 auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
|
|
245 return SM.getFileID(T.location()) == FID;
|
|
246 });
|
|
247 assert(!Batch.empty());
|
|
248 ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
|
|
249
|
|
250 update(Result, testChunk(FID, Batch));
|
|
251 }
|
|
252 return Result;
|
|
253 }
|
|
254
|
|
255 // Cheap check whether any of the tokens in R might be selected.
|
|
256 // If it returns false, test() will return NoTokens or Unselected.
|
|
257 // If it returns true, test() may return any value.
|
|
258 bool mayHit(SourceRange R) const {
|
|
259 if (SpelledTokens.empty())
|
|
260 return false;
|
|
261 auto B = SM.getDecomposedLoc(R.getBegin());
|
|
262 auto E = SM.getDecomposedLoc(R.getEnd());
|
|
263 if (B.first == SelFile && E.first == SelFile)
|
|
264 if (E.second < SpelledTokens.front().Offset ||
|
|
265 B.second > SpelledTokens.back().Offset)
|
|
266 return false;
|
|
267 return true;
|
|
268 }
|
|
269
|
|
270 private:
|
|
271 // Hit-test a consecutive range of tokens from a single file ID.
|
|
272 SelectionTree::Selection
|
|
273 testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
|
|
274 assert(!Batch.empty());
|
|
275 SourceLocation StartLoc = Batch.front().location();
|
|
276 // There are several possible categories of FileID depending on how the
|
|
277 // preprocessor was used to generate these tokens:
|
|
278 // main file, #included file, macro args, macro bodies.
|
|
279 // We need to identify the main-file tokens that represent Batch, and
|
|
280 // determine whether we want to exclusively claim them. Regular tokens
|
|
281 // represent one AST construct, but a macro invocation can represent many.
|
|
282
|
|
283 // Handle tokens written directly in the main file.
|
|
284 if (FID == SelFile) {
|
|
285 return testTokenRange(SM.getFileOffset(Batch.front().location()),
|
|
286 SM.getFileOffset(Batch.back().location()));
|
|
287 }
|
|
288
|
|
289 // Handle tokens in another file #included into the main file.
|
|
290 // Check if the #include is selected, but don't claim it exclusively.
|
|
291 if (StartLoc.isFileID()) {
|
|
292 for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
|
|
293 Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
|
|
294 if (SM.getFileID(Loc) == SelFile)
|
|
295 // FIXME: use whole #include directive, not just the filename string.
|
|
296 return testToken(SM.getFileOffset(Loc));
|
|
297 }
|
|
298 return NoTokens;
|
|
299 }
|
|
300
|
|
301 assert(StartLoc.isMacroID());
|
|
302 // Handle tokens that were passed as a macro argument.
|
|
303 SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
|
|
304 if (SM.getFileID(ArgStart) == SelFile) {
|
173
|
305 if (isFirstExpansion(FID, ArgStart, SM)) {
|
|
306 SourceLocation ArgEnd =
|
|
307 SM.getTopMacroCallerLoc(Batch.back().location());
|
|
308 return testTokenRange(SM.getFileOffset(ArgStart),
|
|
309 SM.getFileOffset(ArgEnd));
|
|
310 } else {
|
|
311 /* fall through and treat as part of the macro body */
|
|
312 }
|
150
|
313 }
|
|
314
|
|
315 // Handle tokens produced by non-argument macro expansion.
|
|
316 // Check if the macro name is selected, don't claim it exclusively.
|
|
317 auto Expansion = SM.getDecomposedExpansionLoc(StartLoc);
|
|
318 if (Expansion.first == SelFile)
|
|
319 // FIXME: also check ( and ) for function-like macros?
|
|
320 return testToken(Expansion.second);
|
|
321 else
|
|
322 return NoTokens;
|
|
323 }
|
|
324
|
|
325 // Is the closed token range [Begin, End] selected?
|
|
326 SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
|
|
327 assert(Begin <= End);
|
|
328 // Outside the selection entirely?
|
|
329 if (End < SpelledTokens.front().Offset ||
|
|
330 Begin > SpelledTokens.back().Offset)
|
|
331 return SelectionTree::Unselected;
|
|
332
|
|
333 // Compute range of tokens.
|
|
334 auto B = llvm::partition_point(
|
|
335 SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
|
|
336 auto E = std::partition_point(
|
|
337 B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
|
|
338
|
|
339 // Aggregate selectedness of tokens in range.
|
|
340 bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
|
|
341 End > SpelledTokens.back().Offset;
|
|
342 SelectionTree::Selection Result =
|
|
343 ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
|
|
344 for (auto It = B; It != E; ++It)
|
|
345 update(Result, It->Selected);
|
|
346 return Result;
|
|
347 }
|
|
348
|
|
349 // Is the token at `Offset` selected?
|
|
350 SelectionTree::Selection testToken(unsigned Offset) const {
|
|
351 // Outside the selection entirely?
|
|
352 if (Offset < SpelledTokens.front().Offset ||
|
|
353 Offset > SpelledTokens.back().Offset)
|
|
354 return SelectionTree::Unselected;
|
|
355 // Find the token, if it exists.
|
|
356 auto It = llvm::partition_point(
|
|
357 SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
|
|
358 if (It != SpelledTokens.end() && It->Offset == Offset)
|
|
359 return It->Selected;
|
|
360 return NoTokens;
|
|
361 }
|
|
362
|
|
363 struct Tok {
|
|
364 unsigned Offset;
|
|
365 SelectionTree::Selection Selected;
|
|
366 };
|
|
367 std::vector<Tok> SpelledTokens;
|
|
368 FileID SelFile;
|
|
369 const SourceManager &SM;
|
|
370 };
|
|
371
|
|
372 // Show the type of a node for debugging.
|
|
373 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
|
|
374 if (const TypeLoc *TL = N.get<TypeLoc>()) {
|
|
375 // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
|
|
376 // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
|
|
377 if (TL->getTypeLocClass() == TypeLoc::Qualified)
|
|
378 OS << "QualifiedTypeLoc";
|
|
379 else
|
|
380 OS << TL->getType()->getTypeClassName() << "TypeLoc";
|
|
381 } else {
|
|
382 OS << N.getNodeKind().asStringRef();
|
|
383 }
|
|
384 }
|
|
385
|
|
386 #ifndef NDEBUG
|
|
387 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
|
|
388 std::string S;
|
|
389 llvm::raw_string_ostream OS(S);
|
|
390 printNodeKind(OS, N);
|
|
391 OS << " ";
|
|
392 return std::move(OS.str());
|
|
393 }
|
|
394 #endif
|
|
395
|
173
|
396 bool isImplicit(const Stmt *S) {
|
150
|
397 // Some Stmts are implicit and shouldn't be traversed, but there's no
|
|
398 // "implicit" attribute on Stmt/Expr.
|
|
399 // Unwrap implicit casts first if present (other nodes too?).
|
|
400 if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
|
|
401 S = ICE->getSubExprAsWritten();
|
|
402 // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
|
|
403 // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
|
|
404 if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
|
|
405 if (CTI->isImplicit())
|
|
406 return true;
|
|
407 // Refs to operator() and [] are (almost?) always implicit as part of calls.
|
|
408 if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
|
|
409 if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
|
|
410 switch (FD->getOverloadedOperator()) {
|
|
411 case OO_Call:
|
|
412 case OO_Subscript:
|
|
413 return true;
|
|
414 default:
|
|
415 break;
|
|
416 }
|
|
417 }
|
|
418 }
|
|
419 return false;
|
|
420 }
|
|
421
|
|
422 // We find the selection by visiting written nodes in the AST, looking for nodes
|
|
423 // that intersect with the selected character range.
|
|
424 //
|
|
425 // While traversing, we maintain a parent stack. As nodes pop off the stack,
|
|
426 // we decide whether to keep them or not. To be kept, they must either be
|
|
427 // selected or contain some nodes that are.
|
|
428 //
|
|
429 // For simple cases (not inside macros) we prune subtrees that don't intersect.
|
|
430 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
|
|
431 public:
|
|
432 // Runs the visitor to gather selected nodes and their ancestors.
|
|
433 // If there is any selection, the root (TUDecl) is the first node.
|
|
434 static std::deque<Node> collect(ASTContext &AST,
|
|
435 const syntax::TokenBuffer &Tokens,
|
|
436 const PrintingPolicy &PP, unsigned Begin,
|
|
437 unsigned End, FileID File) {
|
|
438 SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
|
|
439 V.TraverseAST(AST);
|
|
440 assert(V.Stack.size() == 1 && "Unpaired push/pop?");
|
|
441 assert(V.Stack.top() == &V.Nodes.front());
|
|
442 return std::move(V.Nodes);
|
|
443 }
|
|
444
|
|
445 // We traverse all "well-behaved" nodes the same way:
|
|
446 // - push the node onto the stack
|
|
447 // - traverse its children recursively
|
|
448 // - pop it from the stack
|
|
449 // - hit testing: is intersection(node, selection) - union(children) empty?
|
|
450 // - attach it to the tree if it or any children hit the selection
|
|
451 //
|
|
452 // Two categories of nodes are not "well-behaved":
|
|
453 // - those without source range information, we don't record those
|
|
454 // - those that can't be stored in DynTypedNode.
|
|
455 // We're missing some interesting things like Attr due to the latter.
|
|
456 bool TraverseDecl(Decl *X) {
|
|
457 if (X && isa<TranslationUnitDecl>(X))
|
|
458 return Base::TraverseDecl(X); // Already pushed by constructor.
|
|
459 // Base::TraverseDecl will suppress children, but not this node itself.
|
|
460 if (X && X->isImplicit())
|
|
461 return true;
|
|
462 return traverseNode(X, [&] { return Base::TraverseDecl(X); });
|
|
463 }
|
|
464 bool TraverseTypeLoc(TypeLoc X) {
|
|
465 return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
|
|
466 }
|
|
467 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
|
|
468 return traverseNode(
|
|
469 &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
|
|
470 }
|
|
471 bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
|
|
472 return traverseNode(
|
|
473 X, [&] { return Base::TraverseConstructorInitializer(X); });
|
|
474 }
|
|
475 // Stmt is the same, but this form allows the data recursion optimization.
|
|
476 bool dataTraverseStmtPre(Stmt *X) {
|
|
477 if (!X || isImplicit(X))
|
|
478 return false;
|
|
479 auto N = DynTypedNode::create(*X);
|
|
480 if (canSafelySkipNode(N))
|
|
481 return false;
|
|
482 push(std::move(N));
|
|
483 if (shouldSkipChildren(X)) {
|
|
484 pop();
|
|
485 return false;
|
|
486 }
|
|
487 return true;
|
|
488 }
|
|
489 bool dataTraverseStmtPost(Stmt *X) {
|
|
490 pop();
|
|
491 return true;
|
|
492 }
|
|
493 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
|
|
494 // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
|
|
495 // This means we'd never see 'int' in 'const int'! Work around that here.
|
|
496 // (The reason for the behavior is to avoid traversing the nested Type twice,
|
|
497 // but we ignore TraverseType anyway).
|
|
498 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
|
|
499 return traverseNode<TypeLoc>(
|
|
500 &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
|
|
501 }
|
|
502 // Uninteresting parts of the AST that don't have locations within them.
|
|
503 bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
|
|
504 bool TraverseType(QualType) { return true; }
|
|
505
|
|
506 // The DeclStmt for the loop variable claims to cover the whole range
|
|
507 // inside the parens, this causes the range-init expression to not be hit.
|
|
508 // Traverse the loop VarDecl instead, which has the right source range.
|
|
509 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
|
|
510 return traverseNode(S, [&] {
|
|
511 return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
|
|
512 TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
|
|
513 });
|
|
514 }
|
|
515 // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
|
|
516 bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
|
|
517 return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
|
|
518 }
|
|
519 // We only want to traverse the *syntactic form* to understand the selection.
|
|
520 bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
|
|
521 return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
|
|
522 }
|
|
523
|
|
524 private:
|
|
525 using Base = RecursiveASTVisitor<SelectionVisitor>;
|
|
526
|
|
527 SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
|
|
528 const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
|
|
529 FileID SelFile)
|
|
530 : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
|
|
531 #ifndef NDEBUG
|
|
532 PrintPolicy(PP),
|
|
533 #endif
|
|
534 TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
|
|
535 UnclaimedExpandedTokens(Tokens.expandedTokens()) {
|
|
536 // Ensure we have a node for the TU decl, regardless of traversal scope.
|
|
537 Nodes.emplace_back();
|
|
538 Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
|
|
539 Nodes.back().Parent = nullptr;
|
|
540 Nodes.back().Selected = SelectionTree::Unselected;
|
|
541 Stack.push(&Nodes.back());
|
|
542 }
|
|
543
|
|
544 // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
|
|
545 // Node is always a pointer so the generic code can handle any null checks.
|
|
546 template <typename T, typename Func>
|
|
547 bool traverseNode(T *Node, const Func &Body) {
|
|
548 if (Node == nullptr)
|
|
549 return true;
|
|
550 auto N = DynTypedNode::create(*Node);
|
|
551 if (canSafelySkipNode(N))
|
|
552 return true;
|
|
553 push(DynTypedNode::create(*Node));
|
|
554 bool Ret = Body();
|
|
555 pop();
|
|
556 return Ret;
|
|
557 }
|
|
558
|
|
559 // HIT TESTING
|
|
560 //
|
|
561 // We do rough hit testing on the way down the tree to avoid traversing
|
|
562 // subtrees that don't touch the selection (canSafelySkipNode), but
|
|
563 // fine-grained hit-testing is mostly done on the way back up (in pop()).
|
|
564 // This means children get to claim parts of the selection first, and parents
|
|
565 // are only selected if they own tokens that no child owned.
|
|
566 //
|
|
567 // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
|
|
568 // parent's, and a node (transitively) owns all tokens in its range.
|
|
569 //
|
|
570 // Exception 1: child range claims tokens that should be owned by the parent.
|
|
571 // e.g. in `void foo(int);`, the FunctionTypeLoc should own
|
|
572 // `void (int)` but the parent FunctionDecl should own `foo`.
|
|
573 // To handle this case, certain nodes claim small token ranges *before*
|
|
574 // their children are traversed. (see earlySourceRange).
|
|
575 //
|
|
576 // Exception 2: siblings both claim the same node.
|
|
577 // e.g. `int x, y;` produces two sibling VarDecls.
|
|
578 // ~~~~~ x
|
|
579 // ~~~~~~~~ y
|
|
580 // Here the first ("leftmost") sibling claims the tokens it wants, and the
|
|
581 // other sibling gets what's left. So selecting "int" only includes the left
|
|
582 // VarDecl in the selection tree.
|
|
583
|
|
584 // An optimization for a common case: nodes outside macro expansions that
|
|
585 // don't intersect the selection may be recursively skipped.
|
|
586 bool canSafelySkipNode(const DynTypedNode &N) {
|
|
587 SourceRange S = N.getSourceRange();
|
|
588 if (auto *TL = N.get<TypeLoc>()) {
|
|
589 // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to
|
|
590 // failing
|
|
591 // to descend into the child expression.
|
|
592 // decltype(2+2);
|
|
593 // ~~~~~~~~~~~~~ <-- correct range
|
|
594 // ~~~~~~~~ <-- range reported by getSourceRange()
|
|
595 // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren)
|
|
596 // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get
|
|
597 // rid of this patch.
|
|
598 if (auto DT = TL->getAs<DecltypeTypeLoc>())
|
|
599 S.setEnd(DT.getUnderlyingExpr()->getEndLoc());
|
|
600 }
|
|
601 if (!SelChecker.mayHit(S)) {
|
|
602 dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent());
|
|
603 dlog("{1}skipped range = {0}", S.printToString(SM), indent(1));
|
|
604 return true;
|
|
605 }
|
|
606 return false;
|
|
607 }
|
|
608
|
|
609 // There are certain nodes we want to treat as leaves in the SelectionTree,
|
|
610 // although they do have children.
|
|
611 bool shouldSkipChildren(const Stmt *X) const {
|
|
612 // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
|
|
613 // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
|
|
614 // So we treat UserDefinedLiteral as a leaf node, owning the token.
|
|
615 return llvm::isa<UserDefinedLiteral>(X);
|
|
616 }
|
|
617
|
|
618 // Pushes a node onto the ancestor stack. Pairs with pop().
|
|
619 // Performs early hit detection for some nodes (on the earlySourceRange).
|
|
620 void push(DynTypedNode Node) {
|
|
621 SourceRange Early = earlySourceRange(Node);
|
|
622 dlog("{1}push: {0}", printNodeToString(Node, PrintPolicy), indent());
|
|
623 Nodes.emplace_back();
|
|
624 Nodes.back().ASTNode = std::move(Node);
|
|
625 Nodes.back().Parent = Stack.top();
|
|
626 Nodes.back().Selected = NoTokens;
|
|
627 Stack.push(&Nodes.back());
|
|
628 claimRange(Early, Nodes.back().Selected);
|
|
629 }
|
|
630
|
|
631 // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
|
|
632 // Performs primary hit detection.
|
|
633 void pop() {
|
|
634 Node &N = *Stack.top();
|
|
635 dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
|
|
636 claimRange(N.ASTNode.getSourceRange(), N.Selected);
|
|
637 if (N.Selected == NoTokens)
|
|
638 N.Selected = SelectionTree::Unselected;
|
|
639 if (N.Selected || !N.Children.empty()) {
|
|
640 // Attach to the tree.
|
|
641 N.Parent->Children.push_back(&N);
|
|
642 } else {
|
|
643 // Neither N any children are selected, it doesn't belong in the tree.
|
|
644 assert(&N == &Nodes.back());
|
|
645 Nodes.pop_back();
|
|
646 }
|
|
647 Stack.pop();
|
|
648 }
|
|
649
|
|
650 // Returns the range of tokens that this node will claim directly, and
|
|
651 // is not available to the node's children.
|
|
652 // Usually empty, but sometimes children cover tokens but shouldn't own them.
|
|
653 SourceRange earlySourceRange(const DynTypedNode &N) {
|
|
654 if (const Decl *D = N.get<Decl>()) {
|
173
|
655 // We want constructor name to be claimed by TypeLoc not the constructor
|
|
656 // itself. Similar for deduction guides, we rather want to select the
|
|
657 // underlying TypeLoc.
|
|
658 // FIXME: Unfortunately this doesn't work, even though RecursiveASTVisitor
|
|
659 // traverses the underlying TypeLoc inside DeclarationName, it is null for
|
|
660 // constructors.
|
|
661 if (isa<CXXConstructorDecl>(D) || isa<CXXDeductionGuideDecl>(D))
|
|
662 return SourceRange();
|
|
663 // This will capture Field, Function, MSProperty, NonTypeTemplateParm and
|
|
664 // VarDecls. We want the name in the declarator to be claimed by the decl
|
|
665 // and not by any children. For example:
|
150
|
666 // void [[foo]]();
|
|
667 // int (*[[s]])();
|
173
|
668 // struct X { int [[hash]] [32]; [[operator]] int();}
|
|
669 if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D))
|
|
670 return DD->getLocation();
|
|
671 } else if (const auto *CCI = N.get<CXXCtorInitializer>()) {
|
150
|
672 // : [[b_]](42)
|
|
673 return CCI->getMemberLocation();
|
|
674 }
|
|
675 return SourceRange();
|
|
676 }
|
|
677
|
|
678 // Perform hit-testing of a complete Node against the selection.
|
|
679 // This runs for every node in the AST, and must be fast in common cases.
|
|
680 // This is usually called from pop(), so we can take children into account.
|
|
681 // The existing state of Result is relevant (early/late claims can interact).
|
|
682 void claimRange(SourceRange S, SelectionTree::Selection &Result) {
|
|
683 for (const auto &ClaimedRange :
|
|
684 UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
|
|
685 update(Result, SelChecker.test(ClaimedRange));
|
|
686
|
|
687 if (Result && Result != NoTokens)
|
|
688 dlog("{1}hit selection: {0}", S.printToString(SM), indent());
|
|
689 }
|
|
690
|
|
691 std::string indent(int Offset = 0) {
|
|
692 // Cast for signed arithmetic.
|
|
693 int Amount = int(Stack.size()) + Offset;
|
|
694 assert(Amount >= 0);
|
|
695 return std::string(Amount, ' ');
|
|
696 }
|
|
697
|
|
698 SourceManager &SM;
|
|
699 const LangOptions &LangOpts;
|
|
700 #ifndef NDEBUG
|
|
701 const PrintingPolicy &PrintPolicy;
|
|
702 #endif
|
|
703 const syntax::TokenBuffer &TokenBuf;
|
|
704 std::stack<Node *> Stack;
|
|
705 SelectionTester SelChecker;
|
|
706 IntervalSet<syntax::Token> UnclaimedExpandedTokens;
|
|
707 std::deque<Node> Nodes; // Stable pointers as we add more nodes.
|
|
708 };
|
|
709
|
|
710 } // namespace
|
|
711
|
173
|
712 llvm::SmallString<256> abbreviatedString(DynTypedNode N,
|
|
713 const PrintingPolicy &PP) {
|
|
714 llvm::SmallString<256> Result;
|
|
715 {
|
|
716 llvm::raw_svector_ostream OS(Result);
|
|
717 N.print(OS, PP);
|
|
718 }
|
|
719 auto Pos = Result.find('\n');
|
|
720 if (Pos != llvm::StringRef::npos) {
|
|
721 bool MoreText =
|
|
722 !llvm::all_of(llvm::StringRef(Result).drop_front(Pos), llvm::isSpace);
|
|
723 Result.resize(Pos);
|
|
724 if (MoreText)
|
|
725 Result.append(" …");
|
|
726 }
|
|
727 return Result;
|
|
728 }
|
|
729
|
150
|
730 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
|
|
731 int Indent) const {
|
|
732 if (N.Selected)
|
|
733 OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
|
|
734 : '.');
|
|
735 else
|
|
736 OS.indent(Indent);
|
|
737 printNodeKind(OS, N.ASTNode);
|
173
|
738 OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n";
|
150
|
739 for (const Node *Child : N.Children)
|
|
740 print(OS, *Child, Indent + 2);
|
|
741 }
|
|
742
|
|
743 std::string SelectionTree::Node::kind() const {
|
|
744 std::string S;
|
|
745 llvm::raw_string_ostream OS(S);
|
|
746 printNodeKind(OS, ASTNode);
|
|
747 return std::move(OS.str());
|
|
748 }
|
|
749
|
173
|
750 // Decide which selections emulate a "point" query in between characters.
|
|
751 // If it's ambiguous (the neighboring characters are selectable tokens), returns
|
|
752 // both possibilities in preference order.
|
|
753 // Always returns at least one range - if no tokens touched, and empty range.
|
|
754 static llvm::SmallVector<std::pair<unsigned, unsigned>, 2>
|
|
755 pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) {
|
|
756 const auto &SM = Tokens.sourceManager();
|
|
757 SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset);
|
|
758 llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result;
|
|
759 // Prefer right token over left.
|
|
760 for (const syntax::Token &Tok :
|
|
761 llvm::reverse(spelledTokensTouching(Loc, Tokens))) {
|
|
762 if (shouldIgnore(Tok))
|
|
763 continue;
|
|
764 unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location());
|
|
765 Result.emplace_back(Offset, Offset + Tok.length());
|
|
766 }
|
|
767 if (Result.empty())
|
|
768 Result.emplace_back(Offset, Offset);
|
|
769 return Result;
|
|
770 }
|
|
771
|
|
772 bool SelectionTree::createEach(ASTContext &AST,
|
|
773 const syntax::TokenBuffer &Tokens,
|
|
774 unsigned Begin, unsigned End,
|
|
775 llvm::function_ref<bool(SelectionTree)> Func) {
|
|
776 if (Begin != End)
|
|
777 return Func(SelectionTree(AST, Tokens, Begin, End));
|
|
778 for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens))
|
|
779 if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second)))
|
|
780 return true;
|
|
781 return false;
|
|
782 }
|
|
783
|
|
784 SelectionTree SelectionTree::createRight(ASTContext &AST,
|
|
785 const syntax::TokenBuffer &Tokens,
|
|
786 unsigned int Begin, unsigned int End) {
|
|
787 llvm::Optional<SelectionTree> Result;
|
|
788 createEach(AST, Tokens, Begin, End, [&](SelectionTree T) {
|
|
789 Result = std::move(T);
|
|
790 return true;
|
|
791 });
|
|
792 return std::move(*Result);
|
150
|
793 }
|
|
794
|
|
795 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
|
|
796 unsigned Begin, unsigned End)
|
|
797 : PrintPolicy(AST.getLangOpts()) {
|
|
798 // No fundamental reason the selection needs to be in the main file,
|
|
799 // but that's all clangd has needed so far.
|
|
800 const SourceManager &SM = AST.getSourceManager();
|
|
801 FileID FID = SM.getMainFileID();
|
|
802 PrintPolicy.TerseOutput = true;
|
|
803 PrintPolicy.IncludeNewlines = false;
|
|
804
|
|
805 dlog("Computing selection for {0}",
|
|
806 SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
|
|
807 .printToString(SM));
|
|
808 Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
|
|
809 Root = Nodes.empty() ? nullptr : &Nodes.front();
|
173
|
810 recordMetrics(*this);
|
150
|
811 dlog("Built selection tree\n{0}", *this);
|
|
812 }
|
|
813
|
|
814 const Node *SelectionTree::commonAncestor() const {
|
|
815 const Node *Ancestor = Root;
|
|
816 while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
|
|
817 Ancestor = Ancestor->Children.front();
|
|
818 // Returning nullptr here is a bit unprincipled, but it makes the API safer:
|
|
819 // the TranslationUnitDecl contains all of the preamble, so traversing it is a
|
|
820 // performance cliff. Callers can check for null and use root() if they want.
|
|
821 return Ancestor != Root ? Ancestor : nullptr;
|
|
822 }
|
|
823
|
173
|
824 const DeclContext &SelectionTree::Node::getDeclContext() const {
|
|
825 for (const Node *CurrentNode = this; CurrentNode != nullptr;
|
150
|
826 CurrentNode = CurrentNode->Parent) {
|
173
|
827 if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) {
|
150
|
828 if (CurrentNode != this)
|
|
829 if (auto *DC = dyn_cast<DeclContext>(Current))
|
|
830 return *DC;
|
|
831 return *Current->getDeclContext();
|
|
832 }
|
|
833 }
|
|
834 llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
|
|
835 }
|
|
836
|
|
837 const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
|
|
838 if (Children.size() == 1 &&
|
|
839 Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
|
|
840 return Children.front()->ignoreImplicit();
|
|
841 return *this;
|
|
842 }
|
|
843
|
|
844 const SelectionTree::Node &SelectionTree::Node::outerImplicit() const {
|
|
845 if (Parent && Parent->ASTNode.getSourceRange() == ASTNode.getSourceRange())
|
|
846 return Parent->outerImplicit();
|
|
847 return *this;
|
|
848 }
|
|
849
|
|
850 } // namespace clangd
|
|
851 } // namespace clang
|