150
|
1 //===--- Selection.cpp ----------------------------------------------------===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8
|
|
9 #include "Selection.h"
|
|
10 #include "Logger.h"
|
|
11 #include "SourceCode.h"
|
|
12 #include "clang/AST/ASTTypeTraits.h"
|
|
13 #include "clang/AST/DeclCXX.h"
|
|
14 #include "clang/AST/Expr.h"
|
|
15 #include "clang/AST/ExprCXX.h"
|
|
16 #include "clang/AST/PrettyPrinter.h"
|
|
17 #include "clang/AST/RecursiveASTVisitor.h"
|
|
18 #include "clang/AST/TypeLoc.h"
|
|
19 #include "clang/Basic/OperatorKinds.h"
|
|
20 #include "clang/Basic/SourceLocation.h"
|
|
21 #include "clang/Basic/SourceManager.h"
|
|
22 #include "clang/Basic/TokenKinds.h"
|
|
23 #include "clang/Lex/Lexer.h"
|
|
24 #include "clang/Tooling/Syntax/Tokens.h"
|
|
25 #include "llvm/ADT/STLExtras.h"
|
|
26 #include "llvm/Support/Casting.h"
|
|
27 #include "llvm/Support/raw_ostream.h"
|
|
28 #include <algorithm>
|
|
29 #include <string>
|
|
30
|
|
31 namespace clang {
|
|
32 namespace clangd {
|
|
33 namespace {
|
|
34 using Node = SelectionTree::Node;
|
|
35 using ast_type_traits::DynTypedNode;
|
|
36
|
|
37 // An IntervalSet maintains a set of disjoint subranges of an array.
|
|
38 //
|
|
39 // Initially, it contains the entire array.
|
|
40 // [-----------------------------------------------------------]
|
|
41 //
|
|
42 // When a range is erased(), it will typically split the array in two.
|
|
43 // Claim: [--------------------]
|
|
44 // after: [----------------] [-------------------]
|
|
45 //
|
|
46 // erase() returns the segments actually erased. Given the state above:
|
|
47 // Claim: [---------------------------------------]
|
|
48 // Out: [---------] [------]
|
|
49 // After: [-----] [-----------]
|
|
50 //
|
|
51 // It is used to track (expanded) tokens not yet associated with an AST node.
|
|
52 // On traversing an AST node, its token range is erased from the unclaimed set.
|
|
53 // The tokens actually removed are associated with that node, and hit-tested
|
|
54 // against the selection to determine whether the node is selected.
|
|
55 template <typename T>
|
|
56 class IntervalSet {
|
|
57 public:
|
|
58 IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
|
|
59
|
|
60 // Removes the elements of Claim from the set, modifying or removing ranges
|
|
61 // that overlap it.
|
|
62 // Returns the continuous subranges of Claim that were actually removed.
|
|
63 llvm::SmallVector<llvm::ArrayRef<T>, 4> erase(llvm::ArrayRef<T> Claim) {
|
|
64 llvm::SmallVector<llvm::ArrayRef<T>, 4> Out;
|
|
65 if (Claim.empty())
|
|
66 return Out;
|
|
67
|
|
68 // General case:
|
|
69 // Claim: [-----------------]
|
|
70 // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
|
|
71 // Overlap: ^first ^second
|
|
72 // Ranges C and D are fully included. Ranges B and E must be trimmed.
|
|
73 auto Overlap = std::make_pair(
|
|
74 UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
|
|
75 UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
|
|
76 // Rewind to cover B.
|
|
77 if (Overlap.first != UnclaimedRanges.begin()) {
|
|
78 --Overlap.first;
|
|
79 // ...unless B isn't selected at all.
|
|
80 if (Overlap.first->end() <= Claim.begin())
|
|
81 ++Overlap.first;
|
|
82 }
|
|
83 if (Overlap.first == Overlap.second)
|
|
84 return Out;
|
|
85
|
|
86 // First, copy all overlapping ranges into the output.
|
|
87 auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
|
|
88 // If any of the overlapping ranges were sliced by the claim, split them:
|
|
89 // - restrict the returned range to the claimed part
|
|
90 // - save the unclaimed part so it can be reinserted
|
|
91 llvm::ArrayRef<T> RemainingHead, RemainingTail;
|
|
92 if (Claim.begin() > OutFirst->begin()) {
|
|
93 RemainingHead = {OutFirst->begin(), Claim.begin()};
|
|
94 *OutFirst = {Claim.begin(), OutFirst->end()};
|
|
95 }
|
|
96 if (Claim.end() < Out.back().end()) {
|
|
97 RemainingTail = {Claim.end(), Out.back().end()};
|
|
98 Out.back() = {Out.back().begin(), Claim.end()};
|
|
99 }
|
|
100
|
|
101 // Erase all the overlapping ranges (invalidating all iterators).
|
|
102 UnclaimedRanges.erase(Overlap.first, Overlap.second);
|
|
103 // Reinsert ranges that were merely trimmed.
|
|
104 if (!RemainingHead.empty())
|
|
105 UnclaimedRanges.insert(RemainingHead);
|
|
106 if (!RemainingTail.empty())
|
|
107 UnclaimedRanges.insert(RemainingTail);
|
|
108
|
|
109 return Out;
|
|
110 }
|
|
111
|
|
112 private:
|
|
113 using TokenRange = llvm::ArrayRef<T>;
|
|
114 struct RangeLess {
|
|
115 bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
|
|
116 return L.begin() < R.begin();
|
|
117 }
|
|
118 };
|
|
119
|
|
120 // Disjoint sorted unclaimed ranges of expanded tokens.
|
|
121 std::set<llvm::ArrayRef<T>, RangeLess>
|
|
122 UnclaimedRanges;
|
|
123 };
|
|
124
|
|
125 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
|
|
126 // This resolves to Unselected if no tokens are ever seen.
|
|
127 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
|
|
128 // This value is never exposed publicly.
|
|
129 constexpr SelectionTree::Selection NoTokens =
|
|
130 static_cast<SelectionTree::Selection>(
|
|
131 static_cast<unsigned char>(SelectionTree::Complete + 1));
|
|
132
|
|
133 // Nodes start with NoTokens, and then use this function to aggregate the
|
|
134 // selectedness as more tokens are found.
|
|
135 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
|
|
136 if (New == NoTokens)
|
|
137 return;
|
|
138 if (Result == NoTokens)
|
|
139 Result = New;
|
|
140 else if (Result != New)
|
|
141 // Can only be completely selected (or unselected) if all tokens are.
|
|
142 Result = SelectionTree::Partial;
|
|
143 }
|
|
144
|
|
145
|
|
146 // SelectionTester can determine whether a range of tokens from the PP-expanded
|
|
147 // stream (corresponding to an AST node) is considered selected.
|
|
148 //
|
|
149 // When the tokens result from macro expansions, the appropriate tokens in the
|
|
150 // main file are examined (macro invocation or args). Similarly for #includes.
|
|
151 //
|
|
152 // It tests each token in the range (not just the endpoints) as contiguous
|
|
153 // expanded tokens may not have contiguous spellings (with macros).
|
|
154 //
|
|
155 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
|
|
156 // are ignored when determining selectedness.
|
|
157 class SelectionTester {
|
|
158 public:
|
|
159 // The selection is offsets [SelBegin, SelEnd) in SelFile.
|
|
160 SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
|
|
161 unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
|
|
162 : SelFile(SelFile), SM(SM) {
|
|
163 // Find all tokens (partially) selected in the file.
|
|
164 auto AllSpelledTokens = Buf.spelledTokens(SelFile);
|
|
165 const syntax::Token *SelFirst =
|
|
166 llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
|
|
167 return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
|
|
168 });
|
|
169 const syntax::Token *SelLimit = std::partition_point(
|
|
170 SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
|
|
171 return SM.getFileOffset(Tok.location()) < SelEnd;
|
|
172 });
|
|
173 // Precompute selectedness and offset for selected spelled tokens.
|
|
174 for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) {
|
|
175 // As well as comments, don't count semicolons as real tokens.
|
|
176 // They're not properly claimed as expr-statement is missing from the AST.
|
|
177 if (T->kind() == tok::comment || T->kind() == tok::semi)
|
|
178 continue;
|
|
179 SpelledTokens.emplace_back();
|
|
180 Tok &S = SpelledTokens.back();
|
|
181 S.Offset = SM.getFileOffset(T->location());
|
|
182 if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd)
|
|
183 S.Selected = SelectionTree::Complete;
|
|
184 else
|
|
185 S.Selected = SelectionTree::Partial;
|
|
186 }
|
|
187 }
|
|
188
|
|
189 // Test whether a consecutive range of tokens is selected.
|
|
190 // The tokens are taken from the expanded token stream.
|
|
191 SelectionTree::Selection
|
|
192 test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
|
|
193 if (SpelledTokens.empty())
|
|
194 return NoTokens;
|
|
195 SelectionTree::Selection Result = NoTokens;
|
|
196 while (!ExpandedTokens.empty()) {
|
|
197 // Take consecutive tokens from the same context together for efficiency.
|
|
198 FileID FID = SM.getFileID(ExpandedTokens.front().location());
|
|
199 auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
|
|
200 return SM.getFileID(T.location()) == FID;
|
|
201 });
|
|
202 assert(!Batch.empty());
|
|
203 ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
|
|
204
|
|
205 update(Result, testChunk(FID, Batch));
|
|
206 }
|
|
207 return Result;
|
|
208 }
|
|
209
|
|
210 // Cheap check whether any of the tokens in R might be selected.
|
|
211 // If it returns false, test() will return NoTokens or Unselected.
|
|
212 // If it returns true, test() may return any value.
|
|
213 bool mayHit(SourceRange R) const {
|
|
214 if (SpelledTokens.empty())
|
|
215 return false;
|
|
216 auto B = SM.getDecomposedLoc(R.getBegin());
|
|
217 auto E = SM.getDecomposedLoc(R.getEnd());
|
|
218 if (B.first == SelFile && E.first == SelFile)
|
|
219 if (E.second < SpelledTokens.front().Offset ||
|
|
220 B.second > SpelledTokens.back().Offset)
|
|
221 return false;
|
|
222 return true;
|
|
223 }
|
|
224
|
|
225 private:
|
|
226 // Hit-test a consecutive range of tokens from a single file ID.
|
|
227 SelectionTree::Selection
|
|
228 testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
|
|
229 assert(!Batch.empty());
|
|
230 SourceLocation StartLoc = Batch.front().location();
|
|
231 // There are several possible categories of FileID depending on how the
|
|
232 // preprocessor was used to generate these tokens:
|
|
233 // main file, #included file, macro args, macro bodies.
|
|
234 // We need to identify the main-file tokens that represent Batch, and
|
|
235 // determine whether we want to exclusively claim them. Regular tokens
|
|
236 // represent one AST construct, but a macro invocation can represent many.
|
|
237
|
|
238 // Handle tokens written directly in the main file.
|
|
239 if (FID == SelFile) {
|
|
240 return testTokenRange(SM.getFileOffset(Batch.front().location()),
|
|
241 SM.getFileOffset(Batch.back().location()));
|
|
242 }
|
|
243
|
|
244 // Handle tokens in another file #included into the main file.
|
|
245 // Check if the #include is selected, but don't claim it exclusively.
|
|
246 if (StartLoc.isFileID()) {
|
|
247 for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
|
|
248 Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
|
|
249 if (SM.getFileID(Loc) == SelFile)
|
|
250 // FIXME: use whole #include directive, not just the filename string.
|
|
251 return testToken(SM.getFileOffset(Loc));
|
|
252 }
|
|
253 return NoTokens;
|
|
254 }
|
|
255
|
|
256 assert(StartLoc.isMacroID());
|
|
257 // Handle tokens that were passed as a macro argument.
|
|
258 SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
|
|
259 if (SM.getFileID(ArgStart) == SelFile) {
|
|
260 SourceLocation ArgEnd = SM.getTopMacroCallerLoc(Batch.back().location());
|
|
261 return testTokenRange(SM.getFileOffset(ArgStart),
|
|
262 SM.getFileOffset(ArgEnd));
|
|
263 }
|
|
264
|
|
265 // Handle tokens produced by non-argument macro expansion.
|
|
266 // Check if the macro name is selected, don't claim it exclusively.
|
|
267 auto Expansion = SM.getDecomposedExpansionLoc(StartLoc);
|
|
268 if (Expansion.first == SelFile)
|
|
269 // FIXME: also check ( and ) for function-like macros?
|
|
270 return testToken(Expansion.second);
|
|
271 else
|
|
272 return NoTokens;
|
|
273 }
|
|
274
|
|
275 // Is the closed token range [Begin, End] selected?
|
|
276 SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
|
|
277 assert(Begin <= End);
|
|
278 // Outside the selection entirely?
|
|
279 if (End < SpelledTokens.front().Offset ||
|
|
280 Begin > SpelledTokens.back().Offset)
|
|
281 return SelectionTree::Unselected;
|
|
282
|
|
283 // Compute range of tokens.
|
|
284 auto B = llvm::partition_point(
|
|
285 SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
|
|
286 auto E = std::partition_point(
|
|
287 B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
|
|
288
|
|
289 // Aggregate selectedness of tokens in range.
|
|
290 bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
|
|
291 End > SpelledTokens.back().Offset;
|
|
292 SelectionTree::Selection Result =
|
|
293 ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
|
|
294 for (auto It = B; It != E; ++It)
|
|
295 update(Result, It->Selected);
|
|
296 return Result;
|
|
297 }
|
|
298
|
|
299 // Is the token at `Offset` selected?
|
|
300 SelectionTree::Selection testToken(unsigned Offset) const {
|
|
301 // Outside the selection entirely?
|
|
302 if (Offset < SpelledTokens.front().Offset ||
|
|
303 Offset > SpelledTokens.back().Offset)
|
|
304 return SelectionTree::Unselected;
|
|
305 // Find the token, if it exists.
|
|
306 auto It = llvm::partition_point(
|
|
307 SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
|
|
308 if (It != SpelledTokens.end() && It->Offset == Offset)
|
|
309 return It->Selected;
|
|
310 return NoTokens;
|
|
311 }
|
|
312
|
|
313 struct Tok {
|
|
314 unsigned Offset;
|
|
315 SelectionTree::Selection Selected;
|
|
316 };
|
|
317 std::vector<Tok> SpelledTokens;
|
|
318 FileID SelFile;
|
|
319 const SourceManager &SM;
|
|
320 };
|
|
321
|
|
322 // Show the type of a node for debugging.
|
|
323 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
|
|
324 if (const TypeLoc *TL = N.get<TypeLoc>()) {
|
|
325 // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
|
|
326 // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
|
|
327 if (TL->getTypeLocClass() == TypeLoc::Qualified)
|
|
328 OS << "QualifiedTypeLoc";
|
|
329 else
|
|
330 OS << TL->getType()->getTypeClassName() << "TypeLoc";
|
|
331 } else {
|
|
332 OS << N.getNodeKind().asStringRef();
|
|
333 }
|
|
334 }
|
|
335
|
|
336 #ifndef NDEBUG
|
|
337 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
|
|
338 std::string S;
|
|
339 llvm::raw_string_ostream OS(S);
|
|
340 printNodeKind(OS, N);
|
|
341 OS << " ";
|
|
342 return std::move(OS.str());
|
|
343 }
|
|
344 #endif
|
|
345
|
|
346 bool isImplicit(const Stmt* S) {
|
|
347 // Some Stmts are implicit and shouldn't be traversed, but there's no
|
|
348 // "implicit" attribute on Stmt/Expr.
|
|
349 // Unwrap implicit casts first if present (other nodes too?).
|
|
350 if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
|
|
351 S = ICE->getSubExprAsWritten();
|
|
352 // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
|
|
353 // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
|
|
354 if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
|
|
355 if (CTI->isImplicit())
|
|
356 return true;
|
|
357 // Refs to operator() and [] are (almost?) always implicit as part of calls.
|
|
358 if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
|
|
359 if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
|
|
360 switch (FD->getOverloadedOperator()) {
|
|
361 case OO_Call:
|
|
362 case OO_Subscript:
|
|
363 return true;
|
|
364 default:
|
|
365 break;
|
|
366 }
|
|
367 }
|
|
368 }
|
|
369 return false;
|
|
370 }
|
|
371
|
|
372 // We find the selection by visiting written nodes in the AST, looking for nodes
|
|
373 // that intersect with the selected character range.
|
|
374 //
|
|
375 // While traversing, we maintain a parent stack. As nodes pop off the stack,
|
|
376 // we decide whether to keep them or not. To be kept, they must either be
|
|
377 // selected or contain some nodes that are.
|
|
378 //
|
|
379 // For simple cases (not inside macros) we prune subtrees that don't intersect.
|
|
380 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
|
|
381 public:
|
|
382 // Runs the visitor to gather selected nodes and their ancestors.
|
|
383 // If there is any selection, the root (TUDecl) is the first node.
|
|
384 static std::deque<Node> collect(ASTContext &AST,
|
|
385 const syntax::TokenBuffer &Tokens,
|
|
386 const PrintingPolicy &PP, unsigned Begin,
|
|
387 unsigned End, FileID File) {
|
|
388 SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
|
|
389 V.TraverseAST(AST);
|
|
390 assert(V.Stack.size() == 1 && "Unpaired push/pop?");
|
|
391 assert(V.Stack.top() == &V.Nodes.front());
|
|
392 return std::move(V.Nodes);
|
|
393 }
|
|
394
|
|
395 // We traverse all "well-behaved" nodes the same way:
|
|
396 // - push the node onto the stack
|
|
397 // - traverse its children recursively
|
|
398 // - pop it from the stack
|
|
399 // - hit testing: is intersection(node, selection) - union(children) empty?
|
|
400 // - attach it to the tree if it or any children hit the selection
|
|
401 //
|
|
402 // Two categories of nodes are not "well-behaved":
|
|
403 // - those without source range information, we don't record those
|
|
404 // - those that can't be stored in DynTypedNode.
|
|
405 // We're missing some interesting things like Attr due to the latter.
|
|
406 bool TraverseDecl(Decl *X) {
|
|
407 if (X && isa<TranslationUnitDecl>(X))
|
|
408 return Base::TraverseDecl(X); // Already pushed by constructor.
|
|
409 // Base::TraverseDecl will suppress children, but not this node itself.
|
|
410 if (X && X->isImplicit())
|
|
411 return true;
|
|
412 return traverseNode(X, [&] { return Base::TraverseDecl(X); });
|
|
413 }
|
|
414 bool TraverseTypeLoc(TypeLoc X) {
|
|
415 return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
|
|
416 }
|
|
417 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
|
|
418 return traverseNode(
|
|
419 &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
|
|
420 }
|
|
421 bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
|
|
422 return traverseNode(
|
|
423 X, [&] { return Base::TraverseConstructorInitializer(X); });
|
|
424 }
|
|
425 // Stmt is the same, but this form allows the data recursion optimization.
|
|
426 bool dataTraverseStmtPre(Stmt *X) {
|
|
427 if (!X || isImplicit(X))
|
|
428 return false;
|
|
429 auto N = DynTypedNode::create(*X);
|
|
430 if (canSafelySkipNode(N))
|
|
431 return false;
|
|
432 push(std::move(N));
|
|
433 if (shouldSkipChildren(X)) {
|
|
434 pop();
|
|
435 return false;
|
|
436 }
|
|
437 return true;
|
|
438 }
|
|
439 bool dataTraverseStmtPost(Stmt *X) {
|
|
440 pop();
|
|
441 return true;
|
|
442 }
|
|
443 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
|
|
444 // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
|
|
445 // This means we'd never see 'int' in 'const int'! Work around that here.
|
|
446 // (The reason for the behavior is to avoid traversing the nested Type twice,
|
|
447 // but we ignore TraverseType anyway).
|
|
448 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
|
|
449 return traverseNode<TypeLoc>(
|
|
450 &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
|
|
451 }
|
|
452 // Uninteresting parts of the AST that don't have locations within them.
|
|
453 bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
|
|
454 bool TraverseType(QualType) { return true; }
|
|
455
|
|
456 // The DeclStmt for the loop variable claims to cover the whole range
|
|
457 // inside the parens, this causes the range-init expression to not be hit.
|
|
458 // Traverse the loop VarDecl instead, which has the right source range.
|
|
459 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
|
|
460 return traverseNode(S, [&] {
|
|
461 return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
|
|
462 TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
|
|
463 });
|
|
464 }
|
|
465 // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
|
|
466 bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
|
|
467 return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
|
|
468 }
|
|
469 // We only want to traverse the *syntactic form* to understand the selection.
|
|
470 bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
|
|
471 return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
|
|
472 }
|
|
473
|
|
474 private:
|
|
475 using Base = RecursiveASTVisitor<SelectionVisitor>;
|
|
476
|
|
477 SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
|
|
478 const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
|
|
479 FileID SelFile)
|
|
480 : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
|
|
481 #ifndef NDEBUG
|
|
482 PrintPolicy(PP),
|
|
483 #endif
|
|
484 TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
|
|
485 UnclaimedExpandedTokens(Tokens.expandedTokens()) {
|
|
486 // Ensure we have a node for the TU decl, regardless of traversal scope.
|
|
487 Nodes.emplace_back();
|
|
488 Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
|
|
489 Nodes.back().Parent = nullptr;
|
|
490 Nodes.back().Selected = SelectionTree::Unselected;
|
|
491 Stack.push(&Nodes.back());
|
|
492 }
|
|
493
|
|
494 // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
|
|
495 // Node is always a pointer so the generic code can handle any null checks.
|
|
496 template <typename T, typename Func>
|
|
497 bool traverseNode(T *Node, const Func &Body) {
|
|
498 if (Node == nullptr)
|
|
499 return true;
|
|
500 auto N = DynTypedNode::create(*Node);
|
|
501 if (canSafelySkipNode(N))
|
|
502 return true;
|
|
503 push(DynTypedNode::create(*Node));
|
|
504 bool Ret = Body();
|
|
505 pop();
|
|
506 return Ret;
|
|
507 }
|
|
508
|
|
509 // HIT TESTING
|
|
510 //
|
|
511 // We do rough hit testing on the way down the tree to avoid traversing
|
|
512 // subtrees that don't touch the selection (canSafelySkipNode), but
|
|
513 // fine-grained hit-testing is mostly done on the way back up (in pop()).
|
|
514 // This means children get to claim parts of the selection first, and parents
|
|
515 // are only selected if they own tokens that no child owned.
|
|
516 //
|
|
517 // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
|
|
518 // parent's, and a node (transitively) owns all tokens in its range.
|
|
519 //
|
|
520 // Exception 1: child range claims tokens that should be owned by the parent.
|
|
521 // e.g. in `void foo(int);`, the FunctionTypeLoc should own
|
|
522 // `void (int)` but the parent FunctionDecl should own `foo`.
|
|
523 // To handle this case, certain nodes claim small token ranges *before*
|
|
524 // their children are traversed. (see earlySourceRange).
|
|
525 //
|
|
526 // Exception 2: siblings both claim the same node.
|
|
527 // e.g. `int x, y;` produces two sibling VarDecls.
|
|
528 // ~~~~~ x
|
|
529 // ~~~~~~~~ y
|
|
530 // Here the first ("leftmost") sibling claims the tokens it wants, and the
|
|
531 // other sibling gets what's left. So selecting "int" only includes the left
|
|
532 // VarDecl in the selection tree.
|
|
533
|
|
534 // An optimization for a common case: nodes outside macro expansions that
|
|
535 // don't intersect the selection may be recursively skipped.
|
|
536 bool canSafelySkipNode(const DynTypedNode &N) {
|
|
537 SourceRange S = N.getSourceRange();
|
|
538 if (auto *TL = N.get<TypeLoc>()) {
|
|
539 // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to
|
|
540 // failing
|
|
541 // to descend into the child expression.
|
|
542 // decltype(2+2);
|
|
543 // ~~~~~~~~~~~~~ <-- correct range
|
|
544 // ~~~~~~~~ <-- range reported by getSourceRange()
|
|
545 // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren)
|
|
546 // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get
|
|
547 // rid of this patch.
|
|
548 if (auto DT = TL->getAs<DecltypeTypeLoc>())
|
|
549 S.setEnd(DT.getUnderlyingExpr()->getEndLoc());
|
|
550 }
|
|
551 if (!SelChecker.mayHit(S)) {
|
|
552 dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent());
|
|
553 dlog("{1}skipped range = {0}", S.printToString(SM), indent(1));
|
|
554 return true;
|
|
555 }
|
|
556 return false;
|
|
557 }
|
|
558
|
|
559 // There are certain nodes we want to treat as leaves in the SelectionTree,
|
|
560 // although they do have children.
|
|
561 bool shouldSkipChildren(const Stmt *X) const {
|
|
562 // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
|
|
563 // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
|
|
564 // So we treat UserDefinedLiteral as a leaf node, owning the token.
|
|
565 return llvm::isa<UserDefinedLiteral>(X);
|
|
566 }
|
|
567
|
|
568 // Pushes a node onto the ancestor stack. Pairs with pop().
|
|
569 // Performs early hit detection for some nodes (on the earlySourceRange).
|
|
570 void push(DynTypedNode Node) {
|
|
571 SourceRange Early = earlySourceRange(Node);
|
|
572 dlog("{1}push: {0}", printNodeToString(Node, PrintPolicy), indent());
|
|
573 Nodes.emplace_back();
|
|
574 Nodes.back().ASTNode = std::move(Node);
|
|
575 Nodes.back().Parent = Stack.top();
|
|
576 Nodes.back().Selected = NoTokens;
|
|
577 Stack.push(&Nodes.back());
|
|
578 claimRange(Early, Nodes.back().Selected);
|
|
579 }
|
|
580
|
|
581 // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
|
|
582 // Performs primary hit detection.
|
|
583 void pop() {
|
|
584 Node &N = *Stack.top();
|
|
585 dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
|
|
586 claimRange(N.ASTNode.getSourceRange(), N.Selected);
|
|
587 if (N.Selected == NoTokens)
|
|
588 N.Selected = SelectionTree::Unselected;
|
|
589 if (N.Selected || !N.Children.empty()) {
|
|
590 // Attach to the tree.
|
|
591 N.Parent->Children.push_back(&N);
|
|
592 } else {
|
|
593 // Neither N any children are selected, it doesn't belong in the tree.
|
|
594 assert(&N == &Nodes.back());
|
|
595 Nodes.pop_back();
|
|
596 }
|
|
597 Stack.pop();
|
|
598 }
|
|
599
|
|
600 // Returns the range of tokens that this node will claim directly, and
|
|
601 // is not available to the node's children.
|
|
602 // Usually empty, but sometimes children cover tokens but shouldn't own them.
|
|
603 SourceRange earlySourceRange(const DynTypedNode &N) {
|
|
604 if (const Decl *D = N.get<Decl>()) {
|
|
605 // void [[foo]]();
|
|
606 if (auto *FD = llvm::dyn_cast<FunctionDecl>(D))
|
|
607 return FD->getNameInfo().getSourceRange();
|
|
608 // int (*[[s]])();
|
|
609 else if (auto *VD = llvm::dyn_cast<VarDecl>(D))
|
|
610 return VD->getLocation();
|
|
611 } else if (const auto* CCI = N.get<CXXCtorInitializer>()) {
|
|
612 // : [[b_]](42)
|
|
613 return CCI->getMemberLocation();
|
|
614 }
|
|
615 return SourceRange();
|
|
616 }
|
|
617
|
|
618 // Perform hit-testing of a complete Node against the selection.
|
|
619 // This runs for every node in the AST, and must be fast in common cases.
|
|
620 // This is usually called from pop(), so we can take children into account.
|
|
621 // The existing state of Result is relevant (early/late claims can interact).
|
|
622 void claimRange(SourceRange S, SelectionTree::Selection &Result) {
|
|
623 for (const auto &ClaimedRange :
|
|
624 UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
|
|
625 update(Result, SelChecker.test(ClaimedRange));
|
|
626
|
|
627 if (Result && Result != NoTokens)
|
|
628 dlog("{1}hit selection: {0}", S.printToString(SM), indent());
|
|
629 }
|
|
630
|
|
631 std::string indent(int Offset = 0) {
|
|
632 // Cast for signed arithmetic.
|
|
633 int Amount = int(Stack.size()) + Offset;
|
|
634 assert(Amount >= 0);
|
|
635 return std::string(Amount, ' ');
|
|
636 }
|
|
637
|
|
638 SourceManager &SM;
|
|
639 const LangOptions &LangOpts;
|
|
640 #ifndef NDEBUG
|
|
641 const PrintingPolicy &PrintPolicy;
|
|
642 #endif
|
|
643 const syntax::TokenBuffer &TokenBuf;
|
|
644 std::stack<Node *> Stack;
|
|
645 SelectionTester SelChecker;
|
|
646 IntervalSet<syntax::Token> UnclaimedExpandedTokens;
|
|
647 std::deque<Node> Nodes; // Stable pointers as we add more nodes.
|
|
648 };
|
|
649
|
|
650 } // namespace
|
|
651
|
|
652 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
|
|
653 int Indent) const {
|
|
654 if (N.Selected)
|
|
655 OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
|
|
656 : '.');
|
|
657 else
|
|
658 OS.indent(Indent);
|
|
659 printNodeKind(OS, N.ASTNode);
|
|
660 OS << ' ';
|
|
661 N.ASTNode.print(OS, PrintPolicy);
|
|
662 OS << "\n";
|
|
663 for (const Node *Child : N.Children)
|
|
664 print(OS, *Child, Indent + 2);
|
|
665 }
|
|
666
|
|
667 std::string SelectionTree::Node::kind() const {
|
|
668 std::string S;
|
|
669 llvm::raw_string_ostream OS(S);
|
|
670 printNodeKind(OS, ASTNode);
|
|
671 return std::move(OS.str());
|
|
672 }
|
|
673
|
|
674 // Decide which selection emulates a "point" query in between characters.
|
|
675 static std::pair<unsigned, unsigned> pointBounds(unsigned Offset, FileID FID,
|
|
676 ASTContext &AST) {
|
|
677 StringRef Buf = AST.getSourceManager().getBufferData(FID);
|
|
678 // Edge-cases where the choice is forced.
|
|
679 if (Buf.size() == 0)
|
|
680 return {0, 0};
|
|
681 if (Offset == 0)
|
|
682 return {0, 1};
|
|
683 if (Offset == Buf.size())
|
|
684 return {Offset - 1, Offset};
|
|
685 // We could choose either this byte or the previous. Usually we prefer the
|
|
686 // character on the right of the cursor (or under a block cursor).
|
|
687 // But if that's whitespace/semicolon, we likely want the token on the left.
|
|
688 auto IsIgnoredChar = [](char C) { return isWhitespace(C) || C == ';'; };
|
|
689 if (IsIgnoredChar(Buf[Offset]) && !IsIgnoredChar(Buf[Offset - 1]))
|
|
690 return {Offset - 1, Offset};
|
|
691 return {Offset, Offset + 1};
|
|
692 }
|
|
693
|
|
694 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
|
|
695 unsigned Begin, unsigned End)
|
|
696 : PrintPolicy(AST.getLangOpts()) {
|
|
697 // No fundamental reason the selection needs to be in the main file,
|
|
698 // but that's all clangd has needed so far.
|
|
699 const SourceManager &SM = AST.getSourceManager();
|
|
700 FileID FID = SM.getMainFileID();
|
|
701 if (Begin == End)
|
|
702 std::tie(Begin, End) = pointBounds(Begin, FID, AST);
|
|
703 PrintPolicy.TerseOutput = true;
|
|
704 PrintPolicy.IncludeNewlines = false;
|
|
705
|
|
706 dlog("Computing selection for {0}",
|
|
707 SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
|
|
708 .printToString(SM));
|
|
709 Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
|
|
710 Root = Nodes.empty() ? nullptr : &Nodes.front();
|
|
711 dlog("Built selection tree\n{0}", *this);
|
|
712 }
|
|
713
|
|
714 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
|
|
715 unsigned Offset)
|
|
716 : SelectionTree(AST, Tokens, Offset, Offset) {}
|
|
717
|
|
718 const Node *SelectionTree::commonAncestor() const {
|
|
719 const Node *Ancestor = Root;
|
|
720 while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
|
|
721 Ancestor = Ancestor->Children.front();
|
|
722 // Returning nullptr here is a bit unprincipled, but it makes the API safer:
|
|
723 // the TranslationUnitDecl contains all of the preamble, so traversing it is a
|
|
724 // performance cliff. Callers can check for null and use root() if they want.
|
|
725 return Ancestor != Root ? Ancestor : nullptr;
|
|
726 }
|
|
727
|
|
728 const DeclContext& SelectionTree::Node::getDeclContext() const {
|
|
729 for (const Node* CurrentNode = this; CurrentNode != nullptr;
|
|
730 CurrentNode = CurrentNode->Parent) {
|
|
731 if (const Decl* Current = CurrentNode->ASTNode.get<Decl>()) {
|
|
732 if (CurrentNode != this)
|
|
733 if (auto *DC = dyn_cast<DeclContext>(Current))
|
|
734 return *DC;
|
|
735 return *Current->getDeclContext();
|
|
736 }
|
|
737 }
|
|
738 llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
|
|
739 }
|
|
740
|
|
741 const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
|
|
742 if (Children.size() == 1 &&
|
|
743 Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
|
|
744 return Children.front()->ignoreImplicit();
|
|
745 return *this;
|
|
746 }
|
|
747
|
|
748 const SelectionTree::Node &SelectionTree::Node::outerImplicit() const {
|
|
749 if (Parent && Parent->ASTNode.getSourceRange() == ASTNode.getSourceRange())
|
|
750 return Parent->outerImplicit();
|
|
751 return *this;
|
|
752 }
|
|
753
|
|
754 } // namespace clangd
|
|
755 } // namespace clang
|