150
|
1 //===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 #include "clang/Tooling/Syntax/Tokens.h"
|
|
9
|
|
10 #include "clang/Basic/Diagnostic.h"
|
|
11 #include "clang/Basic/IdentifierTable.h"
|
|
12 #include "clang/Basic/LLVM.h"
|
|
13 #include "clang/Basic/LangOptions.h"
|
|
14 #include "clang/Basic/SourceLocation.h"
|
|
15 #include "clang/Basic/SourceManager.h"
|
|
16 #include "clang/Basic/TokenKinds.h"
|
|
17 #include "clang/Lex/PPCallbacks.h"
|
|
18 #include "clang/Lex/Preprocessor.h"
|
|
19 #include "clang/Lex/Token.h"
|
|
20 #include "llvm/ADT/ArrayRef.h"
|
|
21 #include "llvm/ADT/None.h"
|
|
22 #include "llvm/ADT/Optional.h"
|
|
23 #include "llvm/ADT/STLExtras.h"
|
|
24 #include "llvm/Support/Debug.h"
|
|
25 #include "llvm/Support/ErrorHandling.h"
|
|
26 #include "llvm/Support/FormatVariadic.h"
|
|
27 #include "llvm/Support/raw_ostream.h"
|
|
28 #include <algorithm>
|
|
29 #include <cassert>
|
|
30 #include <iterator>
|
|
31 #include <string>
|
|
32 #include <utility>
|
|
33 #include <vector>
|
|
34
|
|
35 using namespace clang;
|
|
36 using namespace clang::syntax;
|
|
37
|
173
|
38 namespace {
|
|
39 // Finds the smallest consecutive subsuquence of Toks that covers R.
|
|
40 llvm::ArrayRef<syntax::Token>
|
|
41 getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
|
|
42 const SourceManager &SM) {
|
|
43 if (R.isInvalid())
|
|
44 return {};
|
|
45 const syntax::Token *Begin =
|
|
46 llvm::partition_point(Toks, [&](const syntax::Token &T) {
|
|
47 return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
|
|
48 });
|
|
49 const syntax::Token *End =
|
|
50 llvm::partition_point(Toks, [&](const syntax::Token &T) {
|
|
51 return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
|
|
52 });
|
|
53 if (Begin > End)
|
|
54 return {};
|
|
55 return {Begin, End};
|
|
56 }
|
|
57
|
|
58 // Finds the smallest expansion range that contains expanded tokens First and
|
|
59 // Last, e.g.:
|
|
60 // #define ID(x) x
|
|
61 // ID(ID(ID(a1) a2))
|
|
62 // ~~ -> a1
|
|
63 // ~~ -> a2
|
|
64 // ~~~~~~~~~ -> a1 a2
|
|
65 SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
|
|
66 const syntax::Token &Last,
|
|
67 const SourceManager &SM) {
|
|
68 SourceRange Res;
|
|
69 auto FirstLoc = First.location(), LastLoc = Last.location();
|
|
70 // Keep traversing up the spelling chain as longs as tokens are part of the
|
|
71 // same expansion.
|
|
72 while (!FirstLoc.isFileID() && !LastLoc.isFileID()) {
|
|
73 auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
|
|
74 auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
|
|
75 // Stop if expansions have diverged.
|
|
76 if (ExpInfoFirst.getExpansionLocStart() !=
|
|
77 ExpInfoLast.getExpansionLocStart())
|
|
78 break;
|
|
79 // Do not continue into macro bodies.
|
|
80 if (!ExpInfoFirst.isMacroArgExpansion() ||
|
|
81 !ExpInfoLast.isMacroArgExpansion())
|
|
82 break;
|
|
83 FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
|
|
84 LastLoc = SM.getImmediateSpellingLoc(LastLoc);
|
|
85 // Update the result afterwards, as we want the tokens that triggered the
|
|
86 // expansion.
|
|
87 Res = {FirstLoc, LastLoc};
|
|
88 }
|
|
89 // Normally mapping back to expansion location here only changes FileID, as
|
|
90 // we've already found some tokens expanded from the same macro argument, and
|
|
91 // they should map to a consecutive subset of spelled tokens. Unfortunately
|
|
92 // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
|
|
93 // based on their FileID in addition to offsets. So even though we are
|
|
94 // referring to same tokens, SourceManager might tell us that one is before
|
|
95 // the other if they've got different FileIDs.
|
|
96 return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
|
|
97 }
|
|
98
|
|
99 } // namespace
|
|
100
|
150
|
101 syntax::Token::Token(SourceLocation Location, unsigned Length,
|
|
102 tok::TokenKind Kind)
|
|
103 : Location(Location), Length(Length), Kind(Kind) {
|
|
104 assert(Location.isValid());
|
|
105 }
|
|
106
|
|
107 syntax::Token::Token(const clang::Token &T)
|
|
108 : Token(T.getLocation(), T.getLength(), T.getKind()) {
|
|
109 assert(!T.isAnnotation());
|
|
110 }
|
|
111
|
|
112 llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
|
|
113 bool Invalid = false;
|
|
114 const char *Start = SM.getCharacterData(location(), &Invalid);
|
|
115 assert(!Invalid);
|
|
116 return llvm::StringRef(Start, length());
|
|
117 }
|
|
118
|
|
119 FileRange syntax::Token::range(const SourceManager &SM) const {
|
|
120 assert(location().isFileID() && "must be a spelled token");
|
|
121 FileID File;
|
|
122 unsigned StartOffset;
|
|
123 std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
|
|
124 return FileRange(File, StartOffset, StartOffset + length());
|
|
125 }
|
|
126
|
|
127 FileRange syntax::Token::range(const SourceManager &SM,
|
|
128 const syntax::Token &First,
|
|
129 const syntax::Token &Last) {
|
|
130 auto F = First.range(SM);
|
|
131 auto L = Last.range(SM);
|
|
132 assert(F.file() == L.file() && "tokens from different files");
|
173
|
133 assert((F == L || F.endOffset() <= L.beginOffset()) &&
|
|
134 "wrong order of tokens");
|
150
|
135 return FileRange(F.file(), F.beginOffset(), L.endOffset());
|
|
136 }
|
|
137
|
|
138 llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
|
|
139 return OS << T.str();
|
|
140 }
|
|
141
|
|
142 FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
|
|
143 : File(File), Begin(BeginOffset), End(EndOffset) {
|
|
144 assert(File.isValid());
|
|
145 assert(BeginOffset <= EndOffset);
|
|
146 }
|
|
147
|
|
148 FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
|
|
149 unsigned Length) {
|
|
150 assert(BeginLoc.isValid());
|
|
151 assert(BeginLoc.isFileID());
|
|
152
|
|
153 std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
|
|
154 End = Begin + Length;
|
|
155 }
|
|
156 FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
|
|
157 SourceLocation EndLoc) {
|
|
158 assert(BeginLoc.isValid());
|
|
159 assert(BeginLoc.isFileID());
|
|
160 assert(EndLoc.isValid());
|
|
161 assert(EndLoc.isFileID());
|
|
162 assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
|
|
163 assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
|
|
164
|
|
165 std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
|
|
166 End = SM.getFileOffset(EndLoc);
|
|
167 }
|
|
168
|
|
169 llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
|
|
170 const FileRange &R) {
|
|
171 return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
|
|
172 R.file().getHashValue(), R.beginOffset(),
|
|
173 R.endOffset());
|
|
174 }
|
|
175
|
|
176 llvm::StringRef FileRange::text(const SourceManager &SM) const {
|
|
177 bool Invalid = false;
|
|
178 StringRef Text = SM.getBufferData(File, &Invalid);
|
|
179 if (Invalid)
|
|
180 return "";
|
|
181 assert(Begin <= Text.size());
|
|
182 assert(End <= Text.size());
|
|
183 return Text.substr(Begin, length());
|
|
184 }
|
|
185
|
|
186 llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
|
173
|
187 return getTokensCovering(expandedTokens(), R, *SourceMgr);
|
150
|
188 }
|
|
189
|
|
190 CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
|
|
191 return CharSourceRange(
|
|
192 SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
|
|
193 /*IsTokenRange=*/false);
|
|
194 }
|
|
195
|
|
196 std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
|
|
197 TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
|
|
198 assert(Expanded);
|
|
199 assert(ExpandedTokens.data() <= Expanded &&
|
|
200 Expanded < ExpandedTokens.data() + ExpandedTokens.size());
|
|
201
|
|
202 auto FileIt = Files.find(
|
|
203 SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
|
|
204 assert(FileIt != Files.end() && "no file for an expanded token");
|
|
205
|
|
206 const MarkedFile &File = FileIt->second;
|
|
207
|
|
208 unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
|
|
209 // Find the first mapping that produced tokens after \p Expanded.
|
|
210 auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
|
|
211 return M.BeginExpanded <= ExpandedIndex;
|
|
212 });
|
|
213 // Our token could only be produced by the previous mapping.
|
|
214 if (It == File.Mappings.begin()) {
|
|
215 // No previous mapping, no need to modify offsets.
|
173
|
216 return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
|
|
217 /*Mapping=*/nullptr};
|
150
|
218 }
|
|
219 --It; // 'It' now points to last mapping that started before our token.
|
|
220
|
|
221 // Check if the token is part of the mapping.
|
|
222 if (ExpandedIndex < It->EndExpanded)
|
173
|
223 return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
|
150
|
224
|
|
225 // Not part of the mapping, use the index from previous mapping to compute the
|
|
226 // corresponding spelled token.
|
|
227 return {
|
|
228 &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
|
173
|
229 /*Mapping=*/nullptr};
|
|
230 }
|
|
231
|
|
232 const TokenBuffer::Mapping *
|
|
233 TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
|
|
234 const syntax::Token *Spelled) {
|
|
235 assert(F.SpelledTokens.data() <= Spelled);
|
|
236 unsigned SpelledI = Spelled - F.SpelledTokens.data();
|
|
237 assert(SpelledI < F.SpelledTokens.size());
|
|
238
|
|
239 auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
|
|
240 return M.BeginSpelled <= SpelledI;
|
|
241 });
|
|
242 if (It == F.Mappings.begin())
|
|
243 return nullptr;
|
|
244 --It;
|
|
245 return &*It;
|
|
246 }
|
|
247
|
|
248 llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
|
|
249 TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
|
|
250 if (Spelled.empty())
|
|
251 return {};
|
|
252 assert(Spelled.front().location().isFileID());
|
|
253
|
|
254 auto FID = sourceManager().getFileID(Spelled.front().location());
|
|
255 auto It = Files.find(FID);
|
|
256 assert(It != Files.end());
|
|
257
|
|
258 const MarkedFile &File = It->second;
|
|
259 // `Spelled` must be a subrange of `File.SpelledTokens`.
|
|
260 assert(File.SpelledTokens.data() <= Spelled.data());
|
|
261 assert(&Spelled.back() <=
|
|
262 File.SpelledTokens.data() + File.SpelledTokens.size());
|
|
263 #ifndef NDEBUG
|
|
264 auto T1 = Spelled.back().location();
|
|
265 auto T2 = File.SpelledTokens.back().location();
|
|
266 assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
|
|
267 #endif
|
|
268
|
|
269 auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
|
|
270 unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
|
|
271 assert(SpelledFrontI < File.SpelledTokens.size());
|
|
272 unsigned ExpandedBegin;
|
|
273 if (!FrontMapping) {
|
|
274 // No mapping that starts before the first token of Spelled, we don't have
|
|
275 // to modify offsets.
|
|
276 ExpandedBegin = File.BeginExpanded + SpelledFrontI;
|
|
277 } else if (SpelledFrontI < FrontMapping->EndSpelled) {
|
|
278 // This mapping applies to Spelled tokens.
|
|
279 if (SpelledFrontI != FrontMapping->BeginSpelled) {
|
|
280 // Spelled tokens don't cover the entire mapping, returning empty result.
|
|
281 return {}; // FIXME: support macro arguments.
|
|
282 }
|
|
283 // Spelled tokens start at the beginning of this mapping.
|
|
284 ExpandedBegin = FrontMapping->BeginExpanded;
|
|
285 } else {
|
|
286 // Spelled tokens start after the mapping ends (they start in the hole
|
|
287 // between 2 mappings, or between a mapping and end of the file).
|
|
288 ExpandedBegin =
|
|
289 FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
|
|
290 }
|
|
291
|
|
292 auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
|
|
293 unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
|
|
294 unsigned ExpandedEnd;
|
|
295 if (!BackMapping) {
|
|
296 // No mapping that starts before the last token of Spelled, we don't have to
|
|
297 // modify offsets.
|
|
298 ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
|
|
299 } else if (SpelledBackI < BackMapping->EndSpelled) {
|
|
300 // This mapping applies to Spelled tokens.
|
|
301 if (SpelledBackI + 1 != BackMapping->EndSpelled) {
|
|
302 // Spelled tokens don't cover the entire mapping, returning empty result.
|
|
303 return {}; // FIXME: support macro arguments.
|
|
304 }
|
|
305 ExpandedEnd = BackMapping->EndExpanded;
|
|
306 } else {
|
|
307 // Spelled tokens end after the mapping ends.
|
|
308 ExpandedEnd =
|
|
309 BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
|
|
310 }
|
|
311
|
|
312 assert(ExpandedBegin < ExpandedTokens.size());
|
|
313 assert(ExpandedEnd < ExpandedTokens.size());
|
|
314 // Avoid returning empty ranges.
|
|
315 if (ExpandedBegin == ExpandedEnd)
|
|
316 return {};
|
|
317 return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
|
|
318 ExpandedTokens.data() + ExpandedEnd)};
|
150
|
319 }
|
|
320
|
|
321 llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
|
|
322 auto It = Files.find(FID);
|
|
323 assert(It != Files.end());
|
|
324 return It->second.SpelledTokens;
|
|
325 }
|
|
326
|
173
|
327 const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
|
|
328 assert(Loc.isFileID());
|
|
329 const auto *Tok = llvm::partition_point(
|
|
330 spelledTokens(SourceMgr->getFileID(Loc)),
|
|
331 [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
|
|
332 if (!Tok || Tok->location() != Loc)
|
|
333 return nullptr;
|
|
334 return Tok;
|
|
335 }
|
|
336
|
150
|
337 std::string TokenBuffer::Mapping::str() const {
|
|
338 return std::string(
|
|
339 llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
|
|
340 BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
|
|
341 }
|
|
342
|
|
343 llvm::Optional<llvm::ArrayRef<syntax::Token>>
|
|
344 TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
|
|
345 // Mapping an empty range is ambiguous in case of empty mappings at either end
|
|
346 // of the range, bail out in that case.
|
|
347 if (Expanded.empty())
|
|
348 return llvm::None;
|
|
349
|
|
350 const syntax::Token *BeginSpelled;
|
|
351 const Mapping *BeginMapping;
|
|
352 std::tie(BeginSpelled, BeginMapping) =
|
|
353 spelledForExpandedToken(&Expanded.front());
|
|
354
|
|
355 const syntax::Token *LastSpelled;
|
|
356 const Mapping *LastMapping;
|
|
357 std::tie(LastSpelled, LastMapping) =
|
|
358 spelledForExpandedToken(&Expanded.back());
|
|
359
|
|
360 FileID FID = SourceMgr->getFileID(BeginSpelled->location());
|
|
361 // FIXME: Handle multi-file changes by trying to map onto a common root.
|
|
362 if (FID != SourceMgr->getFileID(LastSpelled->location()))
|
|
363 return llvm::None;
|
|
364
|
|
365 const MarkedFile &File = Files.find(FID)->second;
|
|
366
|
173
|
367 // If both tokens are coming from a macro argument expansion, try and map to
|
|
368 // smallest part of the macro argument. BeginMapping && LastMapping check is
|
|
369 // only for performance, they are a prerequisite for Expanded.front() and
|
|
370 // Expanded.back() being part of a macro arg expansion.
|
|
371 if (BeginMapping && LastMapping &&
|
|
372 SourceMgr->isMacroArgExpansion(Expanded.front().location()) &&
|
|
373 SourceMgr->isMacroArgExpansion(Expanded.back().location())) {
|
|
374 auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
|
|
375 Expanded.back(), *SourceMgr);
|
|
376 // It might be the case that tokens are arguments of different macro calls,
|
|
377 // in that case we should continue with the logic below instead of returning
|
|
378 // an empty range.
|
|
379 if (CommonRange.isValid())
|
|
380 return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
|
|
381 }
|
|
382
|
|
383 // Do not allow changes that doesn't cover full expansion.
|
150
|
384 unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
|
|
385 unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
|
173
|
386 if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded)
|
150
|
387 return llvm::None;
|
173
|
388 if (LastMapping && LastMapping->EndExpanded != EndExpanded)
|
150
|
389 return llvm::None;
|
|
390 // All is good, return the result.
|
|
391 return llvm::makeArrayRef(
|
|
392 BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
|
|
393 : BeginSpelled,
|
|
394 LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
|
|
395 : LastSpelled + 1);
|
|
396 }
|
|
397
|
|
398 llvm::Optional<TokenBuffer::Expansion>
|
|
399 TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
|
|
400 assert(Spelled);
|
|
401 assert(Spelled->location().isFileID() && "not a spelled token");
|
|
402 auto FileIt = Files.find(SourceMgr->getFileID(Spelled->location()));
|
|
403 assert(FileIt != Files.end() && "file not tracked by token buffer");
|
|
404
|
|
405 auto &File = FileIt->second;
|
|
406 assert(File.SpelledTokens.data() <= Spelled &&
|
|
407 Spelled < (File.SpelledTokens.data() + File.SpelledTokens.size()));
|
|
408
|
|
409 unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
|
|
410 auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
|
|
411 return M.BeginSpelled < SpelledIndex;
|
|
412 });
|
|
413 if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
|
|
414 return llvm::None;
|
|
415
|
|
416 Expansion E;
|
|
417 E.Spelled = llvm::makeArrayRef(File.SpelledTokens.data() + M->BeginSpelled,
|
|
418 File.SpelledTokens.data() + M->EndSpelled);
|
|
419 E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M->BeginExpanded,
|
|
420 ExpandedTokens.data() + M->EndExpanded);
|
|
421 return E;
|
|
422 }
|
|
423 llvm::ArrayRef<syntax::Token>
|
|
424 syntax::spelledTokensTouching(SourceLocation Loc,
|
|
425 llvm::ArrayRef<syntax::Token> Tokens) {
|
|
426 assert(Loc.isFileID());
|
|
427
|
|
428 auto *Right = llvm::partition_point(
|
|
429 Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
|
|
430 bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
|
|
431 bool AcceptLeft =
|
|
432 Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
|
|
433 return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
|
|
434 Right + (AcceptRight ? 1 : 0));
|
|
435 }
|
|
436
|
|
437 llvm::ArrayRef<syntax::Token>
|
|
438 syntax::spelledTokensTouching(SourceLocation Loc,
|
|
439 const syntax::TokenBuffer &Tokens) {
|
|
440 return spelledTokensTouching(
|
|
441 Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
|
|
442 }
|
|
443
|
|
444 const syntax::Token *
|
|
445 syntax::spelledIdentifierTouching(SourceLocation Loc,
|
|
446 llvm::ArrayRef<syntax::Token> Tokens) {
|
|
447 for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
|
|
448 if (Tok.kind() == tok::identifier)
|
|
449 return &Tok;
|
|
450 }
|
|
451 return nullptr;
|
|
452 }
|
|
453
|
|
454 const syntax::Token *
|
|
455 syntax::spelledIdentifierTouching(SourceLocation Loc,
|
|
456 const syntax::TokenBuffer &Tokens) {
|
|
457 return spelledIdentifierTouching(
|
|
458 Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
|
|
459 }
|
|
460
|
|
461 std::vector<const syntax::Token *>
|
|
462 TokenBuffer::macroExpansions(FileID FID) const {
|
|
463 auto FileIt = Files.find(FID);
|
|
464 assert(FileIt != Files.end() && "file not tracked by token buffer");
|
|
465 auto &File = FileIt->second;
|
|
466 std::vector<const syntax::Token *> Expansions;
|
|
467 auto &Spelled = File.SpelledTokens;
|
|
468 for (auto Mapping : File.Mappings) {
|
|
469 const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
|
|
470 if (Token->kind() == tok::TokenKind::identifier)
|
|
471 Expansions.push_back(Token);
|
|
472 }
|
|
473 return Expansions;
|
|
474 }
|
|
475
|
173
|
476 std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
|
|
477 const SourceManager &SM,
|
150
|
478 const LangOptions &LO) {
|
|
479 std::vector<syntax::Token> Tokens;
|
|
480 IdentifierTable Identifiers(LO);
|
|
481 auto AddToken = [&](clang::Token T) {
|
|
482 // Fill the proper token kind for keywords, etc.
|
|
483 if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
|
|
484 !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
|
|
485 clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
|
|
486 T.setIdentifierInfo(&II);
|
|
487 T.setKind(II.getTokenID());
|
|
488 }
|
|
489 Tokens.push_back(syntax::Token(T));
|
|
490 };
|
|
491
|
173
|
492 auto SrcBuffer = SM.getBufferData(FR.file());
|
|
493 Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
|
|
494 SrcBuffer.data() + FR.beginOffset(),
|
|
495 // We can't make BufEnd point to FR.endOffset, as Lexer requires a
|
|
496 // null terminated buffer.
|
|
497 SrcBuffer.data() + SrcBuffer.size());
|
150
|
498
|
|
499 clang::Token T;
|
173
|
500 while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
|
150
|
501 AddToken(T);
|
173
|
502 // LexFromRawLexer returns true when it parses the last token of the file, add
|
|
503 // it iff it starts within the range we are interested in.
|
|
504 if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
|
150
|
505 AddToken(T);
|
|
506 return Tokens;
|
|
507 }
|
|
508
|
173
|
509 std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
|
|
510 const LangOptions &LO) {
|
|
511 return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
|
|
512 }
|
|
513
|
150
|
514 /// Records information reqired to construct mappings for the token buffer that
|
|
515 /// we are collecting.
|
|
516 class TokenCollector::CollectPPExpansions : public PPCallbacks {
|
|
517 public:
|
|
518 CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
|
|
519
|
|
520 /// Disabled instance will stop reporting anything to TokenCollector.
|
|
521 /// This ensures that uses of the preprocessor after TokenCollector::consume()
|
|
522 /// is called do not access the (possibly invalid) collector instance.
|
|
523 void disable() { Collector = nullptr; }
|
|
524
|
|
525 void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
|
|
526 SourceRange Range, const MacroArgs *Args) override {
|
|
527 if (!Collector)
|
|
528 return;
|
173
|
529 const auto &SM = Collector->PP.getSourceManager();
|
|
530 // Only record top-level expansions that directly produce expanded tokens.
|
|
531 // This excludes those where:
|
150
|
532 // - the macro use is inside a macro body,
|
|
533 // - the macro appears in an argument to another macro.
|
173
|
534 // However macro expansion isn't really a tree, it's token rewrite rules,
|
|
535 // so there are other cases, e.g.
|
|
536 // #define B(X) X
|
|
537 // #define A 1 + B
|
|
538 // A(2)
|
|
539 // Both A and B produce expanded tokens, though the macro name 'B' comes
|
|
540 // from an expansion. The best we can do is merge the mappings for both.
|
|
541
|
|
542 // The *last* token of any top-level macro expansion must be in a file.
|
|
543 // (In the example above, see the closing paren of the expansion of B).
|
|
544 if (!Range.getEnd().isFileID())
|
150
|
545 return;
|
173
|
546 // If there's a current expansion that encloses this one, this one can't be
|
|
547 // top-level.
|
|
548 if (LastExpansionEnd.isValid() &&
|
|
549 !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
|
|
550 return;
|
|
551
|
|
552 // If the macro invocation (B) starts in a macro (A) but ends in a file,
|
|
553 // we'll create a merged mapping for A + B by overwriting the endpoint for
|
|
554 // A's startpoint.
|
|
555 if (!Range.getBegin().isFileID()) {
|
|
556 Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
|
|
557 assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) &&
|
|
558 "Overlapping macros should have same expansion location");
|
|
559 }
|
|
560
|
150
|
561 Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
|
|
562 LastExpansionEnd = Range.getEnd();
|
|
563 }
|
|
564 // FIXME: handle directives like #pragma, #include, etc.
|
|
565 private:
|
|
566 TokenCollector *Collector;
|
|
567 /// Used to detect recursive macro expansions.
|
|
568 SourceLocation LastExpansionEnd;
|
|
569 };
|
|
570
|
|
571 /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
|
|
572 /// implementation tracks the tokens, macro expansions and directives coming
|
|
573 /// from the preprocessor and:
|
|
574 /// - for each token, figures out if it is a part of an expanded token stream,
|
|
575 /// spelled token stream or both. Stores the tokens appropriately.
|
|
576 /// - records mappings from the spelled to expanded token ranges, e.g. for macro
|
|
577 /// expansions.
|
|
578 /// FIXME: also properly record:
|
|
579 /// - #include directives,
|
|
580 /// - #pragma, #line and other PP directives,
|
|
581 /// - skipped pp regions,
|
|
582 /// - ...
|
|
583
|
|
584 TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
|
|
585 // Collect the expanded token stream during preprocessing.
|
|
586 PP.setTokenWatcher([this](const clang::Token &T) {
|
|
587 if (T.isAnnotation())
|
|
588 return;
|
|
589 DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
|
|
590 << "Token: "
|
|
591 << syntax::Token(T).dumpForTests(
|
|
592 this->PP.getSourceManager())
|
|
593 << "\n"
|
|
594
|
|
595 );
|
|
596 Expanded.push_back(syntax::Token(T));
|
|
597 });
|
|
598 // And locations of macro calls, to properly recover boundaries of those in
|
|
599 // case of empty expansions.
|
|
600 auto CB = std::make_unique<CollectPPExpansions>(*this);
|
|
601 this->Collector = CB.get();
|
|
602 PP.addPPCallbacks(std::move(CB));
|
|
603 }
|
|
604
|
|
605 /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
|
|
606 /// token stream.
|
|
607 class TokenCollector::Builder {
|
|
608 public:
|
|
609 Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
|
|
610 const SourceManager &SM, const LangOptions &LangOpts)
|
|
611 : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
|
|
612 LangOpts(LangOpts) {
|
|
613 Result.ExpandedTokens = std::move(Expanded);
|
|
614 }
|
|
615
|
|
616 TokenBuffer build() && {
|
|
617 assert(!Result.ExpandedTokens.empty());
|
|
618 assert(Result.ExpandedTokens.back().kind() == tok::eof);
|
173
|
619
|
|
620 // Tokenize every file that contributed tokens to the expanded stream.
|
|
621 buildSpelledTokens();
|
150
|
622
|
173
|
623 // The expanded token stream consists of runs of tokens that came from
|
|
624 // the same source (a macro expansion, part of a file etc).
|
|
625 // Between these runs are the logical positions of spelled tokens that
|
|
626 // didn't expand to anything.
|
|
627 while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
|
|
628 // Create empty mappings for spelled tokens that expanded to nothing here.
|
|
629 // May advance NextSpelled, but NextExpanded is unchanged.
|
|
630 discard();
|
|
631 // Create mapping for a contiguous run of expanded tokens.
|
|
632 // Advances NextExpanded past the run, and NextSpelled accordingly.
|
|
633 unsigned OldPosition = NextExpanded;
|
|
634 advance();
|
|
635 if (NextExpanded == OldPosition)
|
|
636 diagnoseAdvanceFailure();
|
|
637 }
|
|
638 // If any tokens remain in any of the files, they didn't expand to anything.
|
|
639 // Create empty mappings up until the end of the file.
|
|
640 for (const auto &File : Result.Files)
|
|
641 discard(File.first);
|
150
|
642
|
173
|
643 #ifndef NDEBUG
|
|
644 for (auto &pair : Result.Files) {
|
|
645 auto &mappings = pair.second.Mappings;
|
|
646 assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
|
|
647 const TokenBuffer::Mapping &M2) {
|
|
648 return M1.BeginSpelled < M2.BeginSpelled &&
|
|
649 M1.EndSpelled < M2.EndSpelled &&
|
|
650 M1.BeginExpanded < M2.BeginExpanded &&
|
|
651 M1.EndExpanded < M2.EndExpanded;
|
|
652 }));
|
|
653 }
|
|
654 #endif
|
150
|
655
|
|
656 return std::move(Result);
|
|
657 }
|
|
658
|
|
659 private:
|
173
|
660 // Consume a sequence of spelled tokens that didn't expand to anything.
|
|
661 // In the simplest case, skips spelled tokens until finding one that produced
|
|
662 // the NextExpanded token, and creates an empty mapping for them.
|
|
663 // If Drain is provided, skips remaining tokens from that file instead.
|
|
664 void discard(llvm::Optional<FileID> Drain = llvm::None) {
|
|
665 SourceLocation Target =
|
|
666 Drain ? SM.getLocForEndOfFile(*Drain)
|
|
667 : SM.getExpansionLoc(
|
|
668 Result.ExpandedTokens[NextExpanded].location());
|
|
669 FileID File = SM.getFileID(Target);
|
|
670 const auto &SpelledTokens = Result.Files[File].SpelledTokens;
|
|
671 auto &NextSpelled = this->NextSpelled[File];
|
|
672
|
|
673 TokenBuffer::Mapping Mapping;
|
|
674 Mapping.BeginSpelled = NextSpelled;
|
|
675 // When dropping trailing tokens from a file, the empty mapping should
|
|
676 // be positioned within the file's expanded-token range (at the end).
|
|
677 Mapping.BeginExpanded = Mapping.EndExpanded =
|
|
678 Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
|
|
679 // We may want to split into several adjacent empty mappings.
|
|
680 // FlushMapping() emits the current mapping and starts a new one.
|
|
681 auto FlushMapping = [&, this] {
|
|
682 Mapping.EndSpelled = NextSpelled;
|
|
683 if (Mapping.BeginSpelled != Mapping.EndSpelled)
|
|
684 Result.Files[File].Mappings.push_back(Mapping);
|
|
685 Mapping.BeginSpelled = NextSpelled;
|
|
686 };
|
|
687
|
|
688 while (NextSpelled < SpelledTokens.size() &&
|
|
689 SpelledTokens[NextSpelled].location() < Target) {
|
|
690 // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
|
|
691 // then we want to partition our (empty) mapping.
|
|
692 // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
|
|
693 SourceLocation KnownEnd = CollectedExpansions.lookup(
|
|
694 SpelledTokens[NextSpelled].location().getRawEncoding());
|
|
695 if (KnownEnd.isValid()) {
|
|
696 FlushMapping(); // Emits [Start, NextSpelled)
|
|
697 while (NextSpelled < SpelledTokens.size() &&
|
|
698 SpelledTokens[NextSpelled].location() <= KnownEnd)
|
|
699 ++NextSpelled;
|
|
700 FlushMapping(); // Emits [NextSpelled, KnownEnd]
|
|
701 // Now the loop contitues and will emit (KnownEnd, Target).
|
|
702 } else {
|
|
703 ++NextSpelled;
|
|
704 }
|
150
|
705 }
|
173
|
706 FlushMapping();
|
|
707 }
|
150
|
708
|
173
|
709 // Consumes the NextExpanded token and others that are part of the same run.
|
|
710 // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
|
|
711 // (unless this is a run of file tokens, which we represent with no mapping).
|
|
712 void advance() {
|
|
713 const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
|
|
714 SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
|
|
715 FileID File = SM.getFileID(Expansion);
|
|
716 const auto &SpelledTokens = Result.Files[File].SpelledTokens;
|
|
717 auto &NextSpelled = this->NextSpelled[File];
|
150
|
718
|
173
|
719 if (Tok.location().isFileID()) {
|
|
720 // A run of file tokens continues while the expanded/spelled tokens match.
|
|
721 while (NextSpelled < SpelledTokens.size() &&
|
|
722 NextExpanded < Result.ExpandedTokens.size() &&
|
|
723 SpelledTokens[NextSpelled].location() ==
|
|
724 Result.ExpandedTokens[NextExpanded].location()) {
|
|
725 ++NextSpelled;
|
|
726 ++NextExpanded;
|
|
727 }
|
|
728 // We need no mapping for file tokens copied to the expanded stream.
|
|
729 } else {
|
|
730 // We found a new macro expansion. We should have its spelling bounds.
|
|
731 auto End = CollectedExpansions.lookup(Expansion.getRawEncoding());
|
|
732 assert(End.isValid() && "Macro expansion wasn't captured?");
|
|
733
|
|
734 // Mapping starts here...
|
|
735 TokenBuffer::Mapping Mapping;
|
|
736 Mapping.BeginExpanded = NextExpanded;
|
|
737 Mapping.BeginSpelled = NextSpelled;
|
|
738 // ... consumes spelled tokens within bounds we captured ...
|
|
739 while (NextSpelled < SpelledTokens.size() &&
|
|
740 SpelledTokens[NextSpelled].location() <= End)
|
|
741 ++NextSpelled;
|
|
742 // ... consumes expanded tokens rooted at the same expansion ...
|
|
743 while (NextExpanded < Result.ExpandedTokens.size() &&
|
|
744 SM.getExpansionLoc(
|
|
745 Result.ExpandedTokens[NextExpanded].location()) == Expansion)
|
|
746 ++NextExpanded;
|
|
747 // ... and ends here.
|
|
748 Mapping.EndExpanded = NextExpanded;
|
|
749 Mapping.EndSpelled = NextSpelled;
|
|
750 Result.Files[File].Mappings.push_back(Mapping);
|
150
|
751 }
|
|
752 }
|
|
753
|
173
|
754 // advance() is supposed to consume at least one token - if not, we crash.
|
|
755 void diagnoseAdvanceFailure() {
|
|
756 #ifndef NDEBUG
|
|
757 // Show the failed-to-map token in context.
|
|
758 for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
|
|
759 I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
|
|
760 const char *L =
|
|
761 (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " ";
|
|
762 llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
|
150
|
763 }
|
173
|
764 #endif
|
|
765 llvm_unreachable("Couldn't map expanded token to spelled tokens!");
|
150
|
766 }
|
|
767
|
|
768 /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
|
|
769 /// ranges for each of the files.
|
|
770 void buildSpelledTokens() {
|
|
771 for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
|
173
|
772 const auto &Tok = Result.ExpandedTokens[I];
|
|
773 auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
|
150
|
774 auto It = Result.Files.try_emplace(FID);
|
|
775 TokenBuffer::MarkedFile &File = It.first->second;
|
|
776
|
173
|
777 // The eof token should not be considered part of the main-file's range.
|
|
778 File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
|
|
779
|
150
|
780 if (!It.second)
|
|
781 continue; // we have seen this file before.
|
|
782 // This is the first time we see this file.
|
|
783 File.BeginExpanded = I;
|
|
784 File.SpelledTokens = tokenize(FID, SM, LangOpts);
|
|
785 }
|
|
786 }
|
|
787
|
|
788 TokenBuffer Result;
|
173
|
789 unsigned NextExpanded = 0; // cursor in ExpandedTokens
|
|
790 llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
|
150
|
791 PPExpansions CollectedExpansions;
|
|
792 const SourceManager &SM;
|
|
793 const LangOptions &LangOpts;
|
|
794 };
|
|
795
|
|
796 TokenBuffer TokenCollector::consume() && {
|
|
797 PP.setTokenWatcher(nullptr);
|
|
798 Collector->disable();
|
|
799 return Builder(std::move(Expanded), std::move(Expansions),
|
|
800 PP.getSourceManager(), PP.getLangOpts())
|
|
801 .build();
|
|
802 }
|
|
803
|
|
804 std::string syntax::Token::str() const {
|
|
805 return std::string(llvm::formatv("Token({0}, length = {1})",
|
|
806 tok::getTokenName(kind()), length()));
|
|
807 }
|
|
808
|
|
809 std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
|
173
|
810 return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
|
|
811 tok::getTokenName(kind()), length()));
|
150
|
812 }
|
|
813
|
|
814 std::string TokenBuffer::dumpForTests() const {
|
|
815 auto PrintToken = [this](const syntax::Token &T) -> std::string {
|
|
816 if (T.kind() == tok::eof)
|
|
817 return "<eof>";
|
|
818 return std::string(T.text(*SourceMgr));
|
|
819 };
|
|
820
|
|
821 auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
|
|
822 llvm::ArrayRef<syntax::Token> Tokens) {
|
|
823 if (Tokens.empty()) {
|
|
824 OS << "<empty>";
|
|
825 return;
|
|
826 }
|
|
827 OS << Tokens[0].text(*SourceMgr);
|
|
828 for (unsigned I = 1; I < Tokens.size(); ++I) {
|
|
829 if (Tokens[I].kind() == tok::eof)
|
|
830 continue;
|
|
831 OS << " " << PrintToken(Tokens[I]);
|
|
832 }
|
|
833 };
|
|
834
|
|
835 std::string Dump;
|
|
836 llvm::raw_string_ostream OS(Dump);
|
|
837
|
|
838 OS << "expanded tokens:\n"
|
|
839 << " ";
|
|
840 // (!) we do not show '<eof>'.
|
|
841 DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
|
|
842 OS << "\n";
|
|
843
|
|
844 std::vector<FileID> Keys;
|
|
845 for (auto F : Files)
|
|
846 Keys.push_back(F.first);
|
|
847 llvm::sort(Keys);
|
|
848
|
|
849 for (FileID ID : Keys) {
|
|
850 const MarkedFile &File = Files.find(ID)->second;
|
|
851 auto *Entry = SourceMgr->getFileEntryForID(ID);
|
|
852 if (!Entry)
|
|
853 continue; // Skip builtin files.
|
|
854 OS << llvm::formatv("file '{0}'\n", Entry->getName())
|
|
855 << " spelled tokens:\n"
|
|
856 << " ";
|
|
857 DumpTokens(OS, File.SpelledTokens);
|
|
858 OS << "\n";
|
|
859
|
|
860 if (File.Mappings.empty()) {
|
|
861 OS << " no mappings.\n";
|
|
862 continue;
|
|
863 }
|
|
864 OS << " mappings:\n";
|
|
865 for (auto &M : File.Mappings) {
|
|
866 OS << llvm::formatv(
|
|
867 " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
|
|
868 PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
|
|
869 M.EndSpelled == File.SpelledTokens.size()
|
|
870 ? "<eof>"
|
|
871 : PrintToken(File.SpelledTokens[M.EndSpelled]),
|
|
872 M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
|
|
873 M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
|
|
874 M.EndExpanded);
|
|
875 }
|
|
876 }
|
|
877 return OS.str();
|
|
878 }
|