Mercurial > hg > CbC > CbC_llvm
comparison clang/lib/Lex/Preprocessor.cpp @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | e8a9b4f4d755 0572611fdcc8 |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 //===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===// | |
2 // | |
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
4 // See https://llvm.org/LICENSE.txt for license information. | |
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
6 // | |
7 //===----------------------------------------------------------------------===// | |
8 // | |
9 // This file implements the Preprocessor interface. | |
10 // | |
11 //===----------------------------------------------------------------------===// | |
12 // | |
13 // Options to support: | |
14 // -H - Print the name of each header file used. | |
15 // -d[DNI] - Dump various things. | |
16 // -fworking-directory - #line's with preprocessor's working dir. | |
17 // -fpreprocessed | |
18 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD | |
19 // -W* | |
20 // -w | |
21 // | |
22 // Messages to emit: | |
23 // "Multiple include guards may be useful for:\n" | |
24 // | |
25 //===----------------------------------------------------------------------===// | |
26 | |
27 #include "clang/Lex/Preprocessor.h" | |
28 #include "clang/Basic/Builtins.h" | |
29 #include "clang/Basic/FileManager.h" | |
30 #include "clang/Basic/FileSystemStatCache.h" | |
31 #include "clang/Basic/IdentifierTable.h" | |
32 #include "clang/Basic/LLVM.h" | |
33 #include "clang/Basic/LangOptions.h" | |
34 #include "clang/Basic/Module.h" | |
35 #include "clang/Basic/SourceLocation.h" | |
36 #include "clang/Basic/SourceManager.h" | |
37 #include "clang/Basic/TargetInfo.h" | |
38 #include "clang/Lex/CodeCompletionHandler.h" | |
39 #include "clang/Lex/ExternalPreprocessorSource.h" | |
40 #include "clang/Lex/HeaderSearch.h" | |
41 #include "clang/Lex/LexDiagnostic.h" | |
42 #include "clang/Lex/Lexer.h" | |
43 #include "clang/Lex/LiteralSupport.h" | |
44 #include "clang/Lex/MacroArgs.h" | |
45 #include "clang/Lex/MacroInfo.h" | |
46 #include "clang/Lex/ModuleLoader.h" | |
47 #include "clang/Lex/Pragma.h" | |
48 #include "clang/Lex/PreprocessingRecord.h" | |
49 #include "clang/Lex/PreprocessorLexer.h" | |
50 #include "clang/Lex/PreprocessorOptions.h" | |
51 #include "clang/Lex/ScratchBuffer.h" | |
52 #include "clang/Lex/Token.h" | |
53 #include "clang/Lex/TokenLexer.h" | |
54 #include "llvm/ADT/APInt.h" | |
55 #include "llvm/ADT/ArrayRef.h" | |
56 #include "llvm/ADT/DenseMap.h" | |
57 #include "llvm/ADT/STLExtras.h" | |
58 #include "llvm/ADT/SmallString.h" | |
59 #include "llvm/ADT/SmallVector.h" | |
60 #include "llvm/ADT/StringRef.h" | |
61 #include "llvm/ADT/StringSwitch.h" | |
62 #include "llvm/Support/Capacity.h" | |
63 #include "llvm/Support/ErrorHandling.h" | |
64 #include "llvm/Support/MemoryBuffer.h" | |
65 #include "llvm/Support/raw_ostream.h" | |
66 #include <algorithm> | |
67 #include <cassert> | |
68 #include <memory> | |
69 #include <string> | |
70 #include <utility> | |
71 #include <vector> | |
72 | |
73 using namespace clang; | |
74 | |
75 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) | |
76 | |
77 ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; | |
78 | |
79 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, | |
80 DiagnosticsEngine &diags, LangOptions &opts, | |
81 SourceManager &SM, HeaderSearch &Headers, | |
82 ModuleLoader &TheModuleLoader, | |
83 IdentifierInfoLookup *IILookup, bool OwnsHeaders, | |
84 TranslationUnitKind TUKind) | |
85 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), | |
86 FileMgr(Headers.getFileMgr()), SourceMgr(SM), | |
87 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), | |
88 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), | |
89 // As the language options may have not been loaded yet (when | |
90 // deserializing an ASTUnit), adding keywords to the identifier table is | |
91 // deferred to Preprocessor::Initialize(). | |
92 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())), | |
93 TUKind(TUKind), SkipMainFilePreamble(0, true), | |
94 CurSubmoduleState(&NullSubmoduleState) { | |
95 OwnsHeaderSearch = OwnsHeaders; | |
96 | |
97 // Default to discarding comments. | |
98 KeepComments = false; | |
99 KeepMacroComments = false; | |
100 SuppressIncludeNotFoundError = false; | |
101 | |
102 // Macro expansion is enabled. | |
103 DisableMacroExpansion = false; | |
104 MacroExpansionInDirectivesOverride = false; | |
105 InMacroArgs = false; | |
106 ArgMacro = nullptr; | |
107 InMacroArgPreExpansion = false; | |
108 NumCachedTokenLexers = 0; | |
109 PragmasEnabled = true; | |
110 ParsingIfOrElifDirective = false; | |
111 PreprocessedOutput = false; | |
112 | |
113 // We haven't read anything from the external source. | |
114 ReadMacrosFromExternalSource = false; | |
115 | |
116 BuiltinInfo = std::make_unique<Builtin::Context>(); | |
117 | |
118 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of | |
119 // a macro. They get unpoisoned where it is allowed. | |
120 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); | |
121 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); | |
122 if (getLangOpts().CPlusPlus2a) { | |
123 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); | |
124 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); | |
125 } else { | |
126 Ident__VA_OPT__ = nullptr; | |
127 } | |
128 | |
129 // Initialize the pragma handlers. | |
130 RegisterBuiltinPragmas(); | |
131 | |
132 // Initialize builtin macros like __LINE__ and friends. | |
133 RegisterBuiltinMacros(); | |
134 | |
135 if(LangOpts.Borland) { | |
136 Ident__exception_info = getIdentifierInfo("_exception_info"); | |
137 Ident___exception_info = getIdentifierInfo("__exception_info"); | |
138 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); | |
139 Ident__exception_code = getIdentifierInfo("_exception_code"); | |
140 Ident___exception_code = getIdentifierInfo("__exception_code"); | |
141 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); | |
142 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); | |
143 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); | |
144 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); | |
145 } else { | |
146 Ident__exception_info = Ident__exception_code = nullptr; | |
147 Ident__abnormal_termination = Ident___exception_info = nullptr; | |
148 Ident___exception_code = Ident___abnormal_termination = nullptr; | |
149 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; | |
150 Ident_AbnormalTermination = nullptr; | |
151 } | |
152 | |
153 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens. | |
154 if (usingPCHWithPragmaHdrStop()) | |
155 SkippingUntilPragmaHdrStop = true; | |
156 | |
157 // If using a PCH with a through header, start skipping tokens. | |
158 if (!this->PPOpts->PCHThroughHeader.empty() && | |
159 !this->PPOpts->ImplicitPCHInclude.empty()) | |
160 SkippingUntilPCHThroughHeader = true; | |
161 | |
162 if (this->PPOpts->GeneratePreamble) | |
163 PreambleConditionalStack.startRecording(); | |
164 | |
165 ExcludedConditionalDirectiveSkipMappings = | |
166 this->PPOpts->ExcludedConditionalDirectiveSkipMappings; | |
167 if (ExcludedConditionalDirectiveSkipMappings) | |
168 ExcludedConditionalDirectiveSkipMappings->clear(); | |
169 | |
170 MaxTokens = LangOpts.MaxTokens; | |
171 } | |
172 | |
173 Preprocessor::~Preprocessor() { | |
174 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); | |
175 | |
176 IncludeMacroStack.clear(); | |
177 | |
178 // Destroy any macro definitions. | |
179 while (MacroInfoChain *I = MIChainHead) { | |
180 MIChainHead = I->Next; | |
181 I->~MacroInfoChain(); | |
182 } | |
183 | |
184 // Free any cached macro expanders. | |
185 // This populates MacroArgCache, so all TokenLexers need to be destroyed | |
186 // before the code below that frees up the MacroArgCache list. | |
187 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); | |
188 CurTokenLexer.reset(); | |
189 | |
190 // Free any cached MacroArgs. | |
191 for (MacroArgs *ArgList = MacroArgCache; ArgList;) | |
192 ArgList = ArgList->deallocate(); | |
193 | |
194 // Delete the header search info, if we own it. | |
195 if (OwnsHeaderSearch) | |
196 delete &HeaderInfo; | |
197 } | |
198 | |
199 void Preprocessor::Initialize(const TargetInfo &Target, | |
200 const TargetInfo *AuxTarget) { | |
201 assert((!this->Target || this->Target == &Target) && | |
202 "Invalid override of target information"); | |
203 this->Target = &Target; | |
204 | |
205 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && | |
206 "Invalid override of aux target information."); | |
207 this->AuxTarget = AuxTarget; | |
208 | |
209 // Initialize information about built-ins. | |
210 BuiltinInfo->InitializeTarget(Target, AuxTarget); | |
211 HeaderInfo.setTarget(Target); | |
212 | |
213 // Populate the identifier table with info about keywords for the current language. | |
214 Identifiers.AddKeywords(LangOpts); | |
215 } | |
216 | |
217 void Preprocessor::InitializeForModelFile() { | |
218 NumEnteredSourceFiles = 0; | |
219 | |
220 // Reset pragmas | |
221 PragmaHandlersBackup = std::move(PragmaHandlers); | |
222 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef()); | |
223 RegisterBuiltinPragmas(); | |
224 | |
225 // Reset PredefinesFileID | |
226 PredefinesFileID = FileID(); | |
227 } | |
228 | |
229 void Preprocessor::FinalizeForModelFile() { | |
230 NumEnteredSourceFiles = 1; | |
231 | |
232 PragmaHandlers = std::move(PragmaHandlersBackup); | |
233 } | |
234 | |
235 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { | |
236 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" | |
237 << getSpelling(Tok) << "'"; | |
238 | |
239 if (!DumpFlags) return; | |
240 | |
241 llvm::errs() << "\t"; | |
242 if (Tok.isAtStartOfLine()) | |
243 llvm::errs() << " [StartOfLine]"; | |
244 if (Tok.hasLeadingSpace()) | |
245 llvm::errs() << " [LeadingSpace]"; | |
246 if (Tok.isExpandDisabled()) | |
247 llvm::errs() << " [ExpandDisabled]"; | |
248 if (Tok.needsCleaning()) { | |
249 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); | |
250 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) | |
251 << "']"; | |
252 } | |
253 | |
254 llvm::errs() << "\tLoc=<"; | |
255 DumpLocation(Tok.getLocation()); | |
256 llvm::errs() << ">"; | |
257 } | |
258 | |
259 void Preprocessor::DumpLocation(SourceLocation Loc) const { | |
260 Loc.print(llvm::errs(), SourceMgr); | |
261 } | |
262 | |
263 void Preprocessor::DumpMacro(const MacroInfo &MI) const { | |
264 llvm::errs() << "MACRO: "; | |
265 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { | |
266 DumpToken(MI.getReplacementToken(i)); | |
267 llvm::errs() << " "; | |
268 } | |
269 llvm::errs() << "\n"; | |
270 } | |
271 | |
272 void Preprocessor::PrintStats() { | |
273 llvm::errs() << "\n*** Preprocessor Stats:\n"; | |
274 llvm::errs() << NumDirectives << " directives found:\n"; | |
275 llvm::errs() << " " << NumDefined << " #define.\n"; | |
276 llvm::errs() << " " << NumUndefined << " #undef.\n"; | |
277 llvm::errs() << " #include/#include_next/#import:\n"; | |
278 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; | |
279 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; | |
280 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; | |
281 llvm::errs() << " " << NumElse << " #else/#elif.\n"; | |
282 llvm::errs() << " " << NumEndif << " #endif.\n"; | |
283 llvm::errs() << " " << NumPragma << " #pragma.\n"; | |
284 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; | |
285 | |
286 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" | |
287 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " | |
288 << NumFastMacroExpanded << " on the fast path.\n"; | |
289 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) | |
290 << " token paste (##) operations performed, " | |
291 << NumFastTokenPaste << " on the fast path.\n"; | |
292 | |
293 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; | |
294 | |
295 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); | |
296 llvm::errs() << "\n Macro Expanded Tokens: " | |
297 << llvm::capacity_in_bytes(MacroExpandedTokens); | |
298 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); | |
299 // FIXME: List information for all submodules. | |
300 llvm::errs() << "\n Macros: " | |
301 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); | |
302 llvm::errs() << "\n #pragma push_macro Info: " | |
303 << llvm::capacity_in_bytes(PragmaPushMacroInfo); | |
304 llvm::errs() << "\n Poison Reasons: " | |
305 << llvm::capacity_in_bytes(PoisonReasons); | |
306 llvm::errs() << "\n Comment Handlers: " | |
307 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; | |
308 } | |
309 | |
310 Preprocessor::macro_iterator | |
311 Preprocessor::macro_begin(bool IncludeExternalMacros) const { | |
312 if (IncludeExternalMacros && ExternalSource && | |
313 !ReadMacrosFromExternalSource) { | |
314 ReadMacrosFromExternalSource = true; | |
315 ExternalSource->ReadDefinedMacros(); | |
316 } | |
317 | |
318 // Make sure we cover all macros in visible modules. | |
319 for (const ModuleMacro &Macro : ModuleMacros) | |
320 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); | |
321 | |
322 return CurSubmoduleState->Macros.begin(); | |
323 } | |
324 | |
325 size_t Preprocessor::getTotalMemory() const { | |
326 return BP.getTotalMemory() | |
327 + llvm::capacity_in_bytes(MacroExpandedTokens) | |
328 + Predefines.capacity() /* Predefines buffer. */ | |
329 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, | |
330 // and ModuleMacros. | |
331 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) | |
332 + llvm::capacity_in_bytes(PragmaPushMacroInfo) | |
333 + llvm::capacity_in_bytes(PoisonReasons) | |
334 + llvm::capacity_in_bytes(CommentHandlers); | |
335 } | |
336 | |
337 Preprocessor::macro_iterator | |
338 Preprocessor::macro_end(bool IncludeExternalMacros) const { | |
339 if (IncludeExternalMacros && ExternalSource && | |
340 !ReadMacrosFromExternalSource) { | |
341 ReadMacrosFromExternalSource = true; | |
342 ExternalSource->ReadDefinedMacros(); | |
343 } | |
344 | |
345 return CurSubmoduleState->Macros.end(); | |
346 } | |
347 | |
348 /// Compares macro tokens with a specified token value sequence. | |
349 static bool MacroDefinitionEquals(const MacroInfo *MI, | |
350 ArrayRef<TokenValue> Tokens) { | |
351 return Tokens.size() == MI->getNumTokens() && | |
352 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); | |
353 } | |
354 | |
355 StringRef Preprocessor::getLastMacroWithSpelling( | |
356 SourceLocation Loc, | |
357 ArrayRef<TokenValue> Tokens) const { | |
358 SourceLocation BestLocation; | |
359 StringRef BestSpelling; | |
360 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); | |
361 I != E; ++I) { | |
362 const MacroDirective::DefInfo | |
363 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); | |
364 if (!Def || !Def.getMacroInfo()) | |
365 continue; | |
366 if (!Def.getMacroInfo()->isObjectLike()) | |
367 continue; | |
368 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) | |
369 continue; | |
370 SourceLocation Location = Def.getLocation(); | |
371 // Choose the macro defined latest. | |
372 if (BestLocation.isInvalid() || | |
373 (Location.isValid() && | |
374 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { | |
375 BestLocation = Location; | |
376 BestSpelling = I->first->getName(); | |
377 } | |
378 } | |
379 return BestSpelling; | |
380 } | |
381 | |
382 void Preprocessor::recomputeCurLexerKind() { | |
383 if (CurLexer) | |
384 CurLexerKind = CLK_Lexer; | |
385 else if (CurTokenLexer) | |
386 CurLexerKind = CLK_TokenLexer; | |
387 else | |
388 CurLexerKind = CLK_CachingLexer; | |
389 } | |
390 | |
391 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, | |
392 unsigned CompleteLine, | |
393 unsigned CompleteColumn) { | |
394 assert(File); | |
395 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); | |
396 assert(!CodeCompletionFile && "Already set"); | |
397 | |
398 using llvm::MemoryBuffer; | |
399 | |
400 // Load the actual file's contents. | |
401 bool Invalid = false; | |
402 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); | |
403 if (Invalid) | |
404 return true; | |
405 | |
406 // Find the byte position of the truncation point. | |
407 const char *Position = Buffer->getBufferStart(); | |
408 for (unsigned Line = 1; Line < CompleteLine; ++Line) { | |
409 for (; *Position; ++Position) { | |
410 if (*Position != '\r' && *Position != '\n') | |
411 continue; | |
412 | |
413 // Eat \r\n or \n\r as a single line. | |
414 if ((Position[1] == '\r' || Position[1] == '\n') && | |
415 Position[0] != Position[1]) | |
416 ++Position; | |
417 ++Position; | |
418 break; | |
419 } | |
420 } | |
421 | |
422 Position += CompleteColumn - 1; | |
423 | |
424 // If pointing inside the preamble, adjust the position at the beginning of | |
425 // the file after the preamble. | |
426 if (SkipMainFilePreamble.first && | |
427 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { | |
428 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) | |
429 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; | |
430 } | |
431 | |
432 if (Position > Buffer->getBufferEnd()) | |
433 Position = Buffer->getBufferEnd(); | |
434 | |
435 CodeCompletionFile = File; | |
436 CodeCompletionOffset = Position - Buffer->getBufferStart(); | |
437 | |
438 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer( | |
439 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier()); | |
440 char *NewBuf = NewBuffer->getBufferStart(); | |
441 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); | |
442 *NewPos = '\0'; | |
443 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); | |
444 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); | |
445 | |
446 return false; | |
447 } | |
448 | |
449 void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir, | |
450 bool IsAngled) { | |
451 if (CodeComplete) | |
452 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled); | |
453 setCodeCompletionReached(); | |
454 } | |
455 | |
456 void Preprocessor::CodeCompleteNaturalLanguage() { | |
457 if (CodeComplete) | |
458 CodeComplete->CodeCompleteNaturalLanguage(); | |
459 setCodeCompletionReached(); | |
460 } | |
461 | |
462 /// getSpelling - This method is used to get the spelling of a token into a | |
463 /// SmallVector. Note that the returned StringRef may not point to the | |
464 /// supplied buffer if a copy can be avoided. | |
465 StringRef Preprocessor::getSpelling(const Token &Tok, | |
466 SmallVectorImpl<char> &Buffer, | |
467 bool *Invalid) const { | |
468 // NOTE: this has to be checked *before* testing for an IdentifierInfo. | |
469 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { | |
470 // Try the fast path. | |
471 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) | |
472 return II->getName(); | |
473 } | |
474 | |
475 // Resize the buffer if we need to copy into it. | |
476 if (Tok.needsCleaning()) | |
477 Buffer.resize(Tok.getLength()); | |
478 | |
479 const char *Ptr = Buffer.data(); | |
480 unsigned Len = getSpelling(Tok, Ptr, Invalid); | |
481 return StringRef(Ptr, Len); | |
482 } | |
483 | |
484 /// CreateString - Plop the specified string into a scratch buffer and return a | |
485 /// location for it. If specified, the source location provides a source | |
486 /// location for the token. | |
487 void Preprocessor::CreateString(StringRef Str, Token &Tok, | |
488 SourceLocation ExpansionLocStart, | |
489 SourceLocation ExpansionLocEnd) { | |
490 Tok.setLength(Str.size()); | |
491 | |
492 const char *DestPtr; | |
493 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); | |
494 | |
495 if (ExpansionLocStart.isValid()) | |
496 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, | |
497 ExpansionLocEnd, Str.size()); | |
498 Tok.setLocation(Loc); | |
499 | |
500 // If this is a raw identifier or a literal token, set the pointer data. | |
501 if (Tok.is(tok::raw_identifier)) | |
502 Tok.setRawIdentifierData(DestPtr); | |
503 else if (Tok.isLiteral()) | |
504 Tok.setLiteralData(DestPtr); | |
505 } | |
506 | |
507 SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) { | |
508 auto &SM = getSourceManager(); | |
509 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); | |
510 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc); | |
511 bool Invalid = false; | |
512 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); | |
513 if (Invalid) | |
514 return SourceLocation(); | |
515 | |
516 // FIXME: We could consider re-using spelling for tokens we see repeatedly. | |
517 const char *DestPtr; | |
518 SourceLocation Spelling = | |
519 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr); | |
520 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length)); | |
521 } | |
522 | |
523 Module *Preprocessor::getCurrentModule() { | |
524 if (!getLangOpts().isCompilingModule()) | |
525 return nullptr; | |
526 | |
527 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); | |
528 } | |
529 | |
530 //===----------------------------------------------------------------------===// | |
531 // Preprocessor Initialization Methods | |
532 //===----------------------------------------------------------------------===// | |
533 | |
534 /// EnterMainSourceFile - Enter the specified FileID as the main source file, | |
535 /// which implicitly adds the builtin defines etc. | |
536 void Preprocessor::EnterMainSourceFile() { | |
537 // We do not allow the preprocessor to reenter the main file. Doing so will | |
538 // cause FileID's to accumulate information from both runs (e.g. #line | |
539 // information) and predefined macros aren't guaranteed to be set properly. | |
540 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); | |
541 FileID MainFileID = SourceMgr.getMainFileID(); | |
542 | |
543 // If MainFileID is loaded it means we loaded an AST file, no need to enter | |
544 // a main file. | |
545 if (!SourceMgr.isLoadedFileID(MainFileID)) { | |
546 // Enter the main file source buffer. | |
547 EnterSourceFile(MainFileID, nullptr, SourceLocation()); | |
548 | |
549 // If we've been asked to skip bytes in the main file (e.g., as part of a | |
550 // precompiled preamble), do so now. | |
551 if (SkipMainFilePreamble.first > 0) | |
552 CurLexer->SetByteOffset(SkipMainFilePreamble.first, | |
553 SkipMainFilePreamble.second); | |
554 | |
555 // Tell the header info that the main file was entered. If the file is later | |
556 // #imported, it won't be re-entered. | |
557 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) | |
558 HeaderInfo.IncrementIncludeCount(FE); | |
559 } | |
560 | |
561 // Preprocess Predefines to populate the initial preprocessor state. | |
562 std::unique_ptr<llvm::MemoryBuffer> SB = | |
563 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); | |
564 assert(SB && "Cannot create predefined source buffer"); | |
565 FileID FID = SourceMgr.createFileID(std::move(SB)); | |
566 assert(FID.isValid() && "Could not create FileID for predefines?"); | |
567 setPredefinesFileID(FID); | |
568 | |
569 // Start parsing the predefines. | |
570 EnterSourceFile(FID, nullptr, SourceLocation()); | |
571 | |
572 if (!PPOpts->PCHThroughHeader.empty()) { | |
573 // Lookup and save the FileID for the through header. If it isn't found | |
574 // in the search path, it's a fatal error. | |
575 const DirectoryLookup *CurDir; | |
576 Optional<FileEntryRef> File = LookupFile( | |
577 SourceLocation(), PPOpts->PCHThroughHeader, | |
578 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir, | |
579 /*SearchPath=*/nullptr, /*RelativePath=*/nullptr, | |
580 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr, | |
581 /*IsFrameworkFound=*/nullptr); | |
582 if (!File) { | |
583 Diag(SourceLocation(), diag::err_pp_through_header_not_found) | |
584 << PPOpts->PCHThroughHeader; | |
585 return; | |
586 } | |
587 setPCHThroughHeaderFileID( | |
588 SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User)); | |
589 } | |
590 | |
591 // Skip tokens from the Predefines and if needed the main file. | |
592 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) || | |
593 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop)) | |
594 SkipTokensWhileUsingPCH(); | |
595 } | |
596 | |
597 void Preprocessor::setPCHThroughHeaderFileID(FileID FID) { | |
598 assert(PCHThroughHeaderFileID.isInvalid() && | |
599 "PCHThroughHeaderFileID already set!"); | |
600 PCHThroughHeaderFileID = FID; | |
601 } | |
602 | |
603 bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) { | |
604 assert(PCHThroughHeaderFileID.isValid() && | |
605 "Invalid PCH through header FileID"); | |
606 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID); | |
607 } | |
608 | |
609 bool Preprocessor::creatingPCHWithThroughHeader() { | |
610 return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() && | |
611 PCHThroughHeaderFileID.isValid(); | |
612 } | |
613 | |
614 bool Preprocessor::usingPCHWithThroughHeader() { | |
615 return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() && | |
616 PCHThroughHeaderFileID.isValid(); | |
617 } | |
618 | |
619 bool Preprocessor::creatingPCHWithPragmaHdrStop() { | |
620 return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop; | |
621 } | |
622 | |
623 bool Preprocessor::usingPCHWithPragmaHdrStop() { | |
624 return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop; | |
625 } | |
626 | |
627 /// Skip tokens until after the #include of the through header or | |
628 /// until after a #pragma hdrstop is seen. Tokens in the predefines file | |
629 /// and the main file may be skipped. If the end of the predefines file | |
630 /// is reached, skipping continues into the main file. If the end of the | |
631 /// main file is reached, it's a fatal error. | |
632 void Preprocessor::SkipTokensWhileUsingPCH() { | |
633 bool ReachedMainFileEOF = false; | |
634 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader; | |
635 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop; | |
636 Token Tok; | |
637 while (true) { | |
638 bool InPredefines = | |
639 (CurLexer && CurLexer->getFileID() == getPredefinesFileID()); | |
640 switch (CurLexerKind) { | |
641 case CLK_Lexer: | |
642 CurLexer->Lex(Tok); | |
643 break; | |
644 case CLK_TokenLexer: | |
645 CurTokenLexer->Lex(Tok); | |
646 break; | |
647 case CLK_CachingLexer: | |
648 CachingLex(Tok); | |
649 break; | |
650 case CLK_LexAfterModuleImport: | |
651 LexAfterModuleImport(Tok); | |
652 break; | |
653 } | |
654 if (Tok.is(tok::eof) && !InPredefines) { | |
655 ReachedMainFileEOF = true; | |
656 break; | |
657 } | |
658 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader) | |
659 break; | |
660 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop) | |
661 break; | |
662 } | |
663 if (ReachedMainFileEOF) { | |
664 if (UsingPCHThroughHeader) | |
665 Diag(SourceLocation(), diag::err_pp_through_header_not_seen) | |
666 << PPOpts->PCHThroughHeader << 1; | |
667 else if (!PPOpts->PCHWithHdrStopCreate) | |
668 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen); | |
669 } | |
670 } | |
671 | |
672 void Preprocessor::replayPreambleConditionalStack() { | |
673 // Restore the conditional stack from the preamble, if there is one. | |
674 if (PreambleConditionalStack.isReplaying()) { | |
675 assert(CurPPLexer && | |
676 "CurPPLexer is null when calling replayPreambleConditionalStack."); | |
677 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); | |
678 PreambleConditionalStack.doneReplaying(); | |
679 if (PreambleConditionalStack.reachedEOFWhileSkipping()) | |
680 SkipExcludedConditionalBlock( | |
681 PreambleConditionalStack.SkipInfo->HashTokenLoc, | |
682 PreambleConditionalStack.SkipInfo->IfTokenLoc, | |
683 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion, | |
684 PreambleConditionalStack.SkipInfo->FoundElse, | |
685 PreambleConditionalStack.SkipInfo->ElseLoc); | |
686 } | |
687 } | |
688 | |
689 void Preprocessor::EndSourceFile() { | |
690 // Notify the client that we reached the end of the source file. | |
691 if (Callbacks) | |
692 Callbacks->EndOfMainFile(); | |
693 } | |
694 | |
695 //===----------------------------------------------------------------------===// | |
696 // Lexer Event Handling. | |
697 //===----------------------------------------------------------------------===// | |
698 | |
699 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the | |
700 /// identifier information for the token and install it into the token, | |
701 /// updating the token kind accordingly. | |
702 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { | |
703 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); | |
704 | |
705 // Look up this token, see if it is a macro, or if it is a language keyword. | |
706 IdentifierInfo *II; | |
707 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { | |
708 // No cleaning needed, just use the characters from the lexed buffer. | |
709 II = getIdentifierInfo(Identifier.getRawIdentifier()); | |
710 } else { | |
711 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. | |
712 SmallString<64> IdentifierBuffer; | |
713 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); | |
714 | |
715 if (Identifier.hasUCN()) { | |
716 SmallString<64> UCNIdentifierBuffer; | |
717 expandUCNs(UCNIdentifierBuffer, CleanedStr); | |
718 II = getIdentifierInfo(UCNIdentifierBuffer); | |
719 } else { | |
720 II = getIdentifierInfo(CleanedStr); | |
721 } | |
722 } | |
723 | |
724 // Update the token info (identifier info and appropriate token kind). | |
725 Identifier.setIdentifierInfo(II); | |
726 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && | |
727 getSourceManager().isInSystemHeader(Identifier.getLocation())) | |
728 Identifier.setKind(tok::identifier); | |
729 else | |
730 Identifier.setKind(II->getTokenID()); | |
731 | |
732 return II; | |
733 } | |
734 | |
735 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { | |
736 PoisonReasons[II] = DiagID; | |
737 } | |
738 | |
739 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { | |
740 assert(Ident__exception_code && Ident__exception_info); | |
741 assert(Ident___exception_code && Ident___exception_info); | |
742 Ident__exception_code->setIsPoisoned(Poison); | |
743 Ident___exception_code->setIsPoisoned(Poison); | |
744 Ident_GetExceptionCode->setIsPoisoned(Poison); | |
745 Ident__exception_info->setIsPoisoned(Poison); | |
746 Ident___exception_info->setIsPoisoned(Poison); | |
747 Ident_GetExceptionInfo->setIsPoisoned(Poison); | |
748 Ident__abnormal_termination->setIsPoisoned(Poison); | |
749 Ident___abnormal_termination->setIsPoisoned(Poison); | |
750 Ident_AbnormalTermination->setIsPoisoned(Poison); | |
751 } | |
752 | |
753 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { | |
754 assert(Identifier.getIdentifierInfo() && | |
755 "Can't handle identifiers without identifier info!"); | |
756 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = | |
757 PoisonReasons.find(Identifier.getIdentifierInfo()); | |
758 if(it == PoisonReasons.end()) | |
759 Diag(Identifier, diag::err_pp_used_poisoned_id); | |
760 else | |
761 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); | |
762 } | |
763 | |
764 /// Returns a diagnostic message kind for reporting a future keyword as | |
765 /// appropriate for the identifier and specified language. | |
766 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, | |
767 const LangOptions &LangOpts) { | |
768 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); | |
769 | |
770 if (LangOpts.CPlusPlus) | |
771 return llvm::StringSwitch<diag::kind>(II.getName()) | |
772 #define CXX11_KEYWORD(NAME, FLAGS) \ | |
773 .Case(#NAME, diag::warn_cxx11_keyword) | |
774 #define CXX2A_KEYWORD(NAME, FLAGS) \ | |
775 .Case(#NAME, diag::warn_cxx2a_keyword) | |
776 #include "clang/Basic/TokenKinds.def" | |
777 ; | |
778 | |
779 llvm_unreachable( | |
780 "Keyword not known to come from a newer Standard or proposed Standard"); | |
781 } | |
782 | |
783 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { | |
784 assert(II.isOutOfDate() && "not out of date"); | |
785 getExternalSource()->updateOutOfDateIdentifier(II); | |
786 } | |
787 | |
788 /// HandleIdentifier - This callback is invoked when the lexer reads an | |
789 /// identifier. This callback looks up the identifier in the map and/or | |
790 /// potentially macro expands it or turns it into a named token (like 'for'). | |
791 /// | |
792 /// Note that callers of this method are guarded by checking the | |
793 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the | |
794 /// IdentifierInfo methods that compute these properties will need to change to | |
795 /// match. | |
796 bool Preprocessor::HandleIdentifier(Token &Identifier) { | |
797 assert(Identifier.getIdentifierInfo() && | |
798 "Can't handle identifiers without identifier info!"); | |
799 | |
800 IdentifierInfo &II = *Identifier.getIdentifierInfo(); | |
801 | |
802 // If the information about this identifier is out of date, update it from | |
803 // the external source. | |
804 // We have to treat __VA_ARGS__ in a special way, since it gets | |
805 // serialized with isPoisoned = true, but our preprocessor may have | |
806 // unpoisoned it if we're defining a C99 macro. | |
807 if (II.isOutOfDate()) { | |
808 bool CurrentIsPoisoned = false; | |
809 const bool IsSpecialVariadicMacro = | |
810 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; | |
811 if (IsSpecialVariadicMacro) | |
812 CurrentIsPoisoned = II.isPoisoned(); | |
813 | |
814 updateOutOfDateIdentifier(II); | |
815 Identifier.setKind(II.getTokenID()); | |
816 | |
817 if (IsSpecialVariadicMacro) | |
818 II.setIsPoisoned(CurrentIsPoisoned); | |
819 } | |
820 | |
821 // If this identifier was poisoned, and if it was not produced from a macro | |
822 // expansion, emit an error. | |
823 if (II.isPoisoned() && CurPPLexer) { | |
824 HandlePoisonedIdentifier(Identifier); | |
825 } | |
826 | |
827 // If this is a macro to be expanded, do it. | |
828 if (MacroDefinition MD = getMacroDefinition(&II)) { | |
829 auto *MI = MD.getMacroInfo(); | |
830 assert(MI && "macro definition with no macro info?"); | |
831 if (!DisableMacroExpansion) { | |
832 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { | |
833 // C99 6.10.3p10: If the preprocessing token immediately after the | |
834 // macro name isn't a '(', this macro should not be expanded. | |
835 if (!MI->isFunctionLike() || isNextPPTokenLParen()) | |
836 return HandleMacroExpandedIdentifier(Identifier, MD); | |
837 } else { | |
838 // C99 6.10.3.4p2 says that a disabled macro may never again be | |
839 // expanded, even if it's in a context where it could be expanded in the | |
840 // future. | |
841 Identifier.setFlag(Token::DisableExpand); | |
842 if (MI->isObjectLike() || isNextPPTokenLParen()) | |
843 Diag(Identifier, diag::pp_disabled_macro_expansion); | |
844 } | |
845 } | |
846 } | |
847 | |
848 // If this identifier is a keyword in a newer Standard or proposed Standard, | |
849 // produce a warning. Don't warn if we're not considering macro expansion, | |
850 // since this identifier might be the name of a macro. | |
851 // FIXME: This warning is disabled in cases where it shouldn't be, like | |
852 // "#define constexpr constexpr", "int constexpr;" | |
853 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { | |
854 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) | |
855 << II.getName(); | |
856 // Don't diagnose this keyword again in this translation unit. | |
857 II.setIsFutureCompatKeyword(false); | |
858 } | |
859 | |
860 // If this is an extension token, diagnose its use. | |
861 // We avoid diagnosing tokens that originate from macro definitions. | |
862 // FIXME: This warning is disabled in cases where it shouldn't be, | |
863 // like "#define TY typeof", "TY(1) x". | |
864 if (II.isExtensionToken() && !DisableMacroExpansion) | |
865 Diag(Identifier, diag::ext_token_used); | |
866 | |
867 // If this is the 'import' contextual keyword following an '@', note | |
868 // that the next token indicates a module name. | |
869 // | |
870 // Note that we do not treat 'import' as a contextual | |
871 // keyword when we're in a caching lexer, because caching lexers only get | |
872 // used in contexts where import declarations are disallowed. | |
873 // | |
874 // Likewise if this is the C++ Modules TS import keyword. | |
875 if (((LastTokenWasAt && II.isModulesImport()) || | |
876 Identifier.is(tok::kw_import)) && | |
877 !InMacroArgs && !DisableMacroExpansion && | |
878 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && | |
879 CurLexerKind != CLK_CachingLexer) { | |
880 ModuleImportLoc = Identifier.getLocation(); | |
881 ModuleImportPath.clear(); | |
882 ModuleImportExpectsIdentifier = true; | |
883 CurLexerKind = CLK_LexAfterModuleImport; | |
884 } | |
885 return true; | |
886 } | |
887 | |
888 void Preprocessor::Lex(Token &Result) { | |
889 ++LexLevel; | |
890 | |
891 // We loop here until a lex function returns a token; this avoids recursion. | |
892 bool ReturnedToken; | |
893 do { | |
894 switch (CurLexerKind) { | |
895 case CLK_Lexer: | |
896 ReturnedToken = CurLexer->Lex(Result); | |
897 break; | |
898 case CLK_TokenLexer: | |
899 ReturnedToken = CurTokenLexer->Lex(Result); | |
900 break; | |
901 case CLK_CachingLexer: | |
902 CachingLex(Result); | |
903 ReturnedToken = true; | |
904 break; | |
905 case CLK_LexAfterModuleImport: | |
906 ReturnedToken = LexAfterModuleImport(Result); | |
907 break; | |
908 } | |
909 } while (!ReturnedToken); | |
910 | |
911 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure) | |
912 return; | |
913 | |
914 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) { | |
915 // Remember the identifier before code completion token. | |
916 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); | |
917 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc()); | |
918 // Set IdenfitierInfo to null to avoid confusing code that handles both | |
919 // identifiers and completion tokens. | |
920 Result.setIdentifierInfo(nullptr); | |
921 } | |
922 | |
923 // Update ImportSeqState to track our position within a C++20 import-seq | |
924 // if this token is being produced as a result of phase 4 of translation. | |
925 if (getLangOpts().CPlusPlusModules && LexLevel == 1 && | |
926 !Result.getFlag(Token::IsReinjected)) { | |
927 switch (Result.getKind()) { | |
928 case tok::l_paren: case tok::l_square: case tok::l_brace: | |
929 ImportSeqState.handleOpenBracket(); | |
930 break; | |
931 case tok::r_paren: case tok::r_square: | |
932 ImportSeqState.handleCloseBracket(); | |
933 break; | |
934 case tok::r_brace: | |
935 ImportSeqState.handleCloseBrace(); | |
936 break; | |
937 case tok::semi: | |
938 ImportSeqState.handleSemi(); | |
939 break; | |
940 case tok::header_name: | |
941 case tok::annot_header_unit: | |
942 ImportSeqState.handleHeaderName(); | |
943 break; | |
944 case tok::kw_export: | |
945 ImportSeqState.handleExport(); | |
946 break; | |
947 case tok::identifier: | |
948 if (Result.getIdentifierInfo()->isModulesImport()) { | |
949 ImportSeqState.handleImport(); | |
950 if (ImportSeqState.afterImportSeq()) { | |
951 ModuleImportLoc = Result.getLocation(); | |
952 ModuleImportPath.clear(); | |
953 ModuleImportExpectsIdentifier = true; | |
954 CurLexerKind = CLK_LexAfterModuleImport; | |
955 } | |
956 break; | |
957 } | |
958 LLVM_FALLTHROUGH; | |
959 default: | |
960 ImportSeqState.handleMisc(); | |
961 break; | |
962 } | |
963 } | |
964 | |
965 LastTokenWasAt = Result.is(tok::at); | |
966 --LexLevel; | |
967 | |
968 if (LexLevel == 0 && !Result.getFlag(Token::IsReinjected)) { | |
969 ++TokenCount; | |
970 if (OnToken) | |
971 OnToken(Result); | |
972 } | |
973 } | |
974 | |
975 /// Lex a header-name token (including one formed from header-name-tokens if | |
976 /// \p AllowConcatenation is \c true). | |
977 /// | |
978 /// \param FilenameTok Filled in with the next token. On success, this will | |
979 /// be either a header_name token. On failure, it will be whatever other | |
980 /// token was found instead. | |
981 /// \param AllowMacroExpansion If \c true, allow the header name to be formed | |
982 /// by macro expansion (concatenating tokens as necessary if the first | |
983 /// token is a '<'). | |
984 /// \return \c true if we reached EOD or EOF while looking for a > token in | |
985 /// a concatenated header name and diagnosed it. \c false otherwise. | |
986 bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { | |
987 // Lex using header-name tokenization rules if tokens are being lexed from | |
988 // a file. Just grab a token normally if we're in a macro expansion. | |
989 if (CurPPLexer) | |
990 CurPPLexer->LexIncludeFilename(FilenameTok); | |
991 else | |
992 Lex(FilenameTok); | |
993 | |
994 // This could be a <foo/bar.h> file coming from a macro expansion. In this | |
995 // case, glue the tokens together into an angle_string_literal token. | |
996 SmallString<128> FilenameBuffer; | |
997 if (FilenameTok.is(tok::less) && AllowMacroExpansion) { | |
998 bool StartOfLine = FilenameTok.isAtStartOfLine(); | |
999 bool LeadingSpace = FilenameTok.hasLeadingSpace(); | |
1000 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro(); | |
1001 | |
1002 SourceLocation Start = FilenameTok.getLocation(); | |
1003 SourceLocation End; | |
1004 FilenameBuffer.push_back('<'); | |
1005 | |
1006 // Consume tokens until we find a '>'. | |
1007 // FIXME: A header-name could be formed starting or ending with an | |
1008 // alternative token. It's not clear whether that's ill-formed in all | |
1009 // cases. | |
1010 while (FilenameTok.isNot(tok::greater)) { | |
1011 Lex(FilenameTok); | |
1012 if (FilenameTok.isOneOf(tok::eod, tok::eof)) { | |
1013 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater; | |
1014 Diag(Start, diag::note_matching) << tok::less; | |
1015 return true; | |
1016 } | |
1017 | |
1018 End = FilenameTok.getLocation(); | |
1019 | |
1020 // FIXME: Provide code completion for #includes. | |
1021 if (FilenameTok.is(tok::code_completion)) { | |
1022 setCodeCompletionReached(); | |
1023 Lex(FilenameTok); | |
1024 continue; | |
1025 } | |
1026 | |
1027 // Append the spelling of this token to the buffer. If there was a space | |
1028 // before it, add it now. | |
1029 if (FilenameTok.hasLeadingSpace()) | |
1030 FilenameBuffer.push_back(' '); | |
1031 | |
1032 // Get the spelling of the token, directly into FilenameBuffer if | |
1033 // possible. | |
1034 size_t PreAppendSize = FilenameBuffer.size(); | |
1035 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength()); | |
1036 | |
1037 const char *BufPtr = &FilenameBuffer[PreAppendSize]; | |
1038 unsigned ActualLen = getSpelling(FilenameTok, BufPtr); | |
1039 | |
1040 // If the token was spelled somewhere else, copy it into FilenameBuffer. | |
1041 if (BufPtr != &FilenameBuffer[PreAppendSize]) | |
1042 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen); | |
1043 | |
1044 // Resize FilenameBuffer to the correct size. | |
1045 if (FilenameTok.getLength() != ActualLen) | |
1046 FilenameBuffer.resize(PreAppendSize + ActualLen); | |
1047 } | |
1048 | |
1049 FilenameTok.startToken(); | |
1050 FilenameTok.setKind(tok::header_name); | |
1051 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine); | |
1052 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace); | |
1053 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro); | |
1054 CreateString(FilenameBuffer, FilenameTok, Start, End); | |
1055 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) { | |
1056 // Convert a string-literal token of the form " h-char-sequence " | |
1057 // (produced by macro expansion) into a header-name token. | |
1058 // | |
1059 // The rules for header-names don't quite match the rules for | |
1060 // string-literals, but all the places where they differ result in | |
1061 // undefined behavior, so we can and do treat them the same. | |
1062 // | |
1063 // A string-literal with a prefix or suffix is not translated into a | |
1064 // header-name. This could theoretically be observable via the C++20 | |
1065 // context-sensitive header-name formation rules. | |
1066 StringRef Str = getSpelling(FilenameTok, FilenameBuffer); | |
1067 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"') | |
1068 FilenameTok.setKind(tok::header_name); | |
1069 } | |
1070 | |
1071 return false; | |
1072 } | |
1073 | |
1074 /// Collect the tokens of a C++20 pp-import-suffix. | |
1075 void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { | |
1076 // FIXME: For error recovery, consider recognizing attribute syntax here | |
1077 // and terminating / diagnosing a missing semicolon if we find anything | |
1078 // else? (Can we leave that to the parser?) | |
1079 unsigned BracketDepth = 0; | |
1080 while (true) { | |
1081 Toks.emplace_back(); | |
1082 Lex(Toks.back()); | |
1083 | |
1084 switch (Toks.back().getKind()) { | |
1085 case tok::l_paren: case tok::l_square: case tok::l_brace: | |
1086 ++BracketDepth; | |
1087 break; | |
1088 | |
1089 case tok::r_paren: case tok::r_square: case tok::r_brace: | |
1090 if (BracketDepth == 0) | |
1091 return; | |
1092 --BracketDepth; | |
1093 break; | |
1094 | |
1095 case tok::semi: | |
1096 if (BracketDepth == 0) | |
1097 return; | |
1098 break; | |
1099 | |
1100 case tok::eof: | |
1101 return; | |
1102 | |
1103 default: | |
1104 break; | |
1105 } | |
1106 } | |
1107 } | |
1108 | |
1109 | |
1110 /// Lex a token following the 'import' contextual keyword. | |
1111 /// | |
1112 /// pp-import: [C++20] | |
1113 /// import header-name pp-import-suffix[opt] ; | |
1114 /// import header-name-tokens pp-import-suffix[opt] ; | |
1115 /// [ObjC] @ import module-name ; | |
1116 /// [Clang] import module-name ; | |
1117 /// | |
1118 /// header-name-tokens: | |
1119 /// string-literal | |
1120 /// < [any sequence of preprocessing-tokens other than >] > | |
1121 /// | |
1122 /// module-name: | |
1123 /// module-name-qualifier[opt] identifier | |
1124 /// | |
1125 /// module-name-qualifier | |
1126 /// module-name-qualifier[opt] identifier . | |
1127 /// | |
1128 /// We respond to a pp-import by importing macros from the named module. | |
1129 bool Preprocessor::LexAfterModuleImport(Token &Result) { | |
1130 // Figure out what kind of lexer we actually have. | |
1131 recomputeCurLexerKind(); | |
1132 | |
1133 // Lex the next token. The header-name lexing rules are used at the start of | |
1134 // a pp-import. | |
1135 // | |
1136 // For now, we only support header-name imports in C++20 mode. | |
1137 // FIXME: Should we allow this in all language modes that support an import | |
1138 // declaration as an extension? | |
1139 if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { | |
1140 if (LexHeaderName(Result)) | |
1141 return true; | |
1142 } else { | |
1143 Lex(Result); | |
1144 } | |
1145 | |
1146 // Allocate a holding buffer for a sequence of tokens and introduce it into | |
1147 // the token stream. | |
1148 auto EnterTokens = [this](ArrayRef<Token> Toks) { | |
1149 auto ToksCopy = std::make_unique<Token[]>(Toks.size()); | |
1150 std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); | |
1151 EnterTokenStream(std::move(ToksCopy), Toks.size(), | |
1152 /*DisableMacroExpansion*/ true, /*IsReinject*/ false); | |
1153 }; | |
1154 | |
1155 // Check for a header-name. | |
1156 SmallVector<Token, 32> Suffix; | |
1157 if (Result.is(tok::header_name)) { | |
1158 // Enter the header-name token into the token stream; a Lex action cannot | |
1159 // both return a token and cache tokens (doing so would corrupt the token | |
1160 // cache if the call to Lex comes from CachingLex / PeekAhead). | |
1161 Suffix.push_back(Result); | |
1162 | |
1163 // Consume the pp-import-suffix and expand any macros in it now. We'll add | |
1164 // it back into the token stream later. | |
1165 CollectPpImportSuffix(Suffix); | |
1166 if (Suffix.back().isNot(tok::semi)) { | |
1167 // This is not a pp-import after all. | |
1168 EnterTokens(Suffix); | |
1169 return false; | |
1170 } | |
1171 | |
1172 // C++2a [cpp.module]p1: | |
1173 // The ';' preprocessing-token terminating a pp-import shall not have | |
1174 // been produced by macro replacement. | |
1175 SourceLocation SemiLoc = Suffix.back().getLocation(); | |
1176 if (SemiLoc.isMacroID()) | |
1177 Diag(SemiLoc, diag::err_header_import_semi_in_macro); | |
1178 | |
1179 // Reconstitute the import token. | |
1180 Token ImportTok; | |
1181 ImportTok.startToken(); | |
1182 ImportTok.setKind(tok::kw_import); | |
1183 ImportTok.setLocation(ModuleImportLoc); | |
1184 ImportTok.setIdentifierInfo(getIdentifierInfo("import")); | |
1185 ImportTok.setLength(6); | |
1186 | |
1187 auto Action = HandleHeaderIncludeOrImport( | |
1188 /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); | |
1189 switch (Action.Kind) { | |
1190 case ImportAction::None: | |
1191 break; | |
1192 | |
1193 case ImportAction::ModuleBegin: | |
1194 // Let the parser know we're textually entering the module. | |
1195 Suffix.emplace_back(); | |
1196 Suffix.back().startToken(); | |
1197 Suffix.back().setKind(tok::annot_module_begin); | |
1198 Suffix.back().setLocation(SemiLoc); | |
1199 Suffix.back().setAnnotationEndLoc(SemiLoc); | |
1200 Suffix.back().setAnnotationValue(Action.ModuleForHeader); | |
1201 LLVM_FALLTHROUGH; | |
1202 | |
1203 case ImportAction::ModuleImport: | |
1204 case ImportAction::SkippedModuleImport: | |
1205 // We chose to import (or textually enter) the file. Convert the | |
1206 // header-name token into a header unit annotation token. | |
1207 Suffix[0].setKind(tok::annot_header_unit); | |
1208 Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); | |
1209 Suffix[0].setAnnotationValue(Action.ModuleForHeader); | |
1210 // FIXME: Call the moduleImport callback? | |
1211 break; | |
1212 case ImportAction::Failure: | |
1213 assert(TheModuleLoader.HadFatalFailure && | |
1214 "This should be an early exit only to a fatal error"); | |
1215 Result.setKind(tok::eof); | |
1216 CurLexer->cutOffLexing(); | |
1217 EnterTokens(Suffix); | |
1218 return true; | |
1219 } | |
1220 | |
1221 EnterTokens(Suffix); | |
1222 return false; | |
1223 } | |
1224 | |
1225 // The token sequence | |
1226 // | |
1227 // import identifier (. identifier)* | |
1228 // | |
1229 // indicates a module import directive. We already saw the 'import' | |
1230 // contextual keyword, so now we're looking for the identifiers. | |
1231 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { | |
1232 // We expected to see an identifier here, and we did; continue handling | |
1233 // identifiers. | |
1234 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), | |
1235 Result.getLocation())); | |
1236 ModuleImportExpectsIdentifier = false; | |
1237 CurLexerKind = CLK_LexAfterModuleImport; | |
1238 return true; | |
1239 } | |
1240 | |
1241 // If we're expecting a '.' or a ';', and we got a '.', then wait until we | |
1242 // see the next identifier. (We can also see a '[[' that begins an | |
1243 // attribute-specifier-seq here under the C++ Modules TS.) | |
1244 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { | |
1245 ModuleImportExpectsIdentifier = true; | |
1246 CurLexerKind = CLK_LexAfterModuleImport; | |
1247 return true; | |
1248 } | |
1249 | |
1250 // If we didn't recognize a module name at all, this is not a (valid) import. | |
1251 if (ModuleImportPath.empty() || Result.is(tok::eof)) | |
1252 return true; | |
1253 | |
1254 // Consume the pp-import-suffix and expand any macros in it now, if we're not | |
1255 // at the semicolon already. | |
1256 SourceLocation SemiLoc = Result.getLocation(); | |
1257 if (Result.isNot(tok::semi)) { | |
1258 Suffix.push_back(Result); | |
1259 CollectPpImportSuffix(Suffix); | |
1260 if (Suffix.back().isNot(tok::semi)) { | |
1261 // This is not an import after all. | |
1262 EnterTokens(Suffix); | |
1263 return false; | |
1264 } | |
1265 SemiLoc = Suffix.back().getLocation(); | |
1266 } | |
1267 | |
1268 // Under the Modules TS, the dot is just part of the module name, and not | |
1269 // a real hierarchy separator. Flatten such module names now. | |
1270 // | |
1271 // FIXME: Is this the right level to be performing this transformation? | |
1272 std::string FlatModuleName; | |
1273 if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) { | |
1274 for (auto &Piece : ModuleImportPath) { | |
1275 if (!FlatModuleName.empty()) | |
1276 FlatModuleName += "."; | |
1277 FlatModuleName += Piece.first->getName(); | |
1278 } | |
1279 SourceLocation FirstPathLoc = ModuleImportPath[0].second; | |
1280 ModuleImportPath.clear(); | |
1281 ModuleImportPath.push_back( | |
1282 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); | |
1283 } | |
1284 | |
1285 Module *Imported = nullptr; | |
1286 if (getLangOpts().Modules) { | |
1287 Imported = TheModuleLoader.loadModule(ModuleImportLoc, | |
1288 ModuleImportPath, | |
1289 Module::Hidden, | |
1290 /*IsInclusionDirective=*/false); | |
1291 if (Imported) | |
1292 makeModuleVisible(Imported, SemiLoc); | |
1293 } | |
1294 if (Callbacks) | |
1295 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); | |
1296 | |
1297 if (!Suffix.empty()) { | |
1298 EnterTokens(Suffix); | |
1299 return false; | |
1300 } | |
1301 return true; | |
1302 } | |
1303 | |
1304 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { | |
1305 CurSubmoduleState->VisibleModules.setVisible( | |
1306 M, Loc, [](Module *) {}, | |
1307 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { | |
1308 // FIXME: Include the path in the diagnostic. | |
1309 // FIXME: Include the import location for the conflicting module. | |
1310 Diag(ModuleImportLoc, diag::warn_module_conflict) | |
1311 << Path[0]->getFullModuleName() | |
1312 << Conflict->getFullModuleName() | |
1313 << Message; | |
1314 }); | |
1315 | |
1316 // Add this module to the imports list of the currently-built submodule. | |
1317 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) | |
1318 BuildingSubmoduleStack.back().M->Imports.insert(M); | |
1319 } | |
1320 | |
1321 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, | |
1322 const char *DiagnosticTag, | |
1323 bool AllowMacroExpansion) { | |
1324 // We need at least one string literal. | |
1325 if (Result.isNot(tok::string_literal)) { | |
1326 Diag(Result, diag::err_expected_string_literal) | |
1327 << /*Source='in...'*/0 << DiagnosticTag; | |
1328 return false; | |
1329 } | |
1330 | |
1331 // Lex string literal tokens, optionally with macro expansion. | |
1332 SmallVector<Token, 4> StrToks; | |
1333 do { | |
1334 StrToks.push_back(Result); | |
1335 | |
1336 if (Result.hasUDSuffix()) | |
1337 Diag(Result, diag::err_invalid_string_udl); | |
1338 | |
1339 if (AllowMacroExpansion) | |
1340 Lex(Result); | |
1341 else | |
1342 LexUnexpandedToken(Result); | |
1343 } while (Result.is(tok::string_literal)); | |
1344 | |
1345 // Concatenate and parse the strings. | |
1346 StringLiteralParser Literal(StrToks, *this); | |
1347 assert(Literal.isAscii() && "Didn't allow wide strings in"); | |
1348 | |
1349 if (Literal.hadError) | |
1350 return false; | |
1351 | |
1352 if (Literal.Pascal) { | |
1353 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) | |
1354 << /*Source='in...'*/0 << DiagnosticTag; | |
1355 return false; | |
1356 } | |
1357 | |
1358 String = std::string(Literal.GetString()); | |
1359 return true; | |
1360 } | |
1361 | |
1362 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { | |
1363 assert(Tok.is(tok::numeric_constant)); | |
1364 SmallString<8> IntegerBuffer; | |
1365 bool NumberInvalid = false; | |
1366 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); | |
1367 if (NumberInvalid) | |
1368 return false; | |
1369 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); | |
1370 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) | |
1371 return false; | |
1372 llvm::APInt APVal(64, 0); | |
1373 if (Literal.GetIntegerValue(APVal)) | |
1374 return false; | |
1375 Lex(Tok); | |
1376 Value = APVal.getLimitedValue(); | |
1377 return true; | |
1378 } | |
1379 | |
1380 void Preprocessor::addCommentHandler(CommentHandler *Handler) { | |
1381 assert(Handler && "NULL comment handler"); | |
1382 assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() && | |
1383 "Comment handler already registered"); | |
1384 CommentHandlers.push_back(Handler); | |
1385 } | |
1386 | |
1387 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { | |
1388 std::vector<CommentHandler *>::iterator Pos = | |
1389 llvm::find(CommentHandlers, Handler); | |
1390 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); | |
1391 CommentHandlers.erase(Pos); | |
1392 } | |
1393 | |
1394 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { | |
1395 bool AnyPendingTokens = false; | |
1396 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), | |
1397 HEnd = CommentHandlers.end(); | |
1398 H != HEnd; ++H) { | |
1399 if ((*H)->HandleComment(*this, Comment)) | |
1400 AnyPendingTokens = true; | |
1401 } | |
1402 if (!AnyPendingTokens || getCommentRetentionState()) | |
1403 return false; | |
1404 Lex(result); | |
1405 return true; | |
1406 } | |
1407 | |
1408 ModuleLoader::~ModuleLoader() = default; | |
1409 | |
1410 CommentHandler::~CommentHandler() = default; | |
1411 | |
1412 CodeCompletionHandler::~CodeCompletionHandler() = default; | |
1413 | |
1414 void Preprocessor::createPreprocessingRecord() { | |
1415 if (Record) | |
1416 return; | |
1417 | |
1418 Record = new PreprocessingRecord(getSourceManager()); | |
1419 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); | |
1420 } |