Mercurial > hg > CbC > CbC_llvm
annotate clang/lib/Lex/Preprocessor.cpp @ 209:dd44ba33042e
merged...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 08 Jun 2021 06:36:09 +0900 |
parents | 2e18cbf3894f b7591485f4cd |
children | 50b3abffaea6 |
rev | line source |
---|---|
150 | 1 //===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===// |
2 // | |
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
4 // See https://llvm.org/LICENSE.txt for license information. | |
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
6 // | |
7 //===----------------------------------------------------------------------===// | |
8 // | |
9 // This file implements the Preprocessor interface. | |
10 // | |
11 //===----------------------------------------------------------------------===// | |
12 // | |
13 // Options to support: | |
14 // -H - Print the name of each header file used. | |
15 // -d[DNI] - Dump various things. | |
16 // -fworking-directory - #line's with preprocessor's working dir. | |
17 // -fpreprocessed | |
18 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD | |
19 // -W* | |
20 // -w | |
21 // | |
22 // Messages to emit: | |
23 // "Multiple include guards may be useful for:\n" | |
24 // | |
25 //===----------------------------------------------------------------------===// | |
26 | |
27 #include "clang/Lex/Preprocessor.h" | |
28 #include "clang/Basic/Builtins.h" | |
29 #include "clang/Basic/FileManager.h" | |
30 #include "clang/Basic/FileSystemStatCache.h" | |
31 #include "clang/Basic/IdentifierTable.h" | |
32 #include "clang/Basic/LLVM.h" | |
33 #include "clang/Basic/LangOptions.h" | |
34 #include "clang/Basic/Module.h" | |
35 #include "clang/Basic/SourceLocation.h" | |
36 #include "clang/Basic/SourceManager.h" | |
37 #include "clang/Basic/TargetInfo.h" | |
38 #include "clang/Lex/CodeCompletionHandler.h" | |
39 #include "clang/Lex/ExternalPreprocessorSource.h" | |
40 #include "clang/Lex/HeaderSearch.h" | |
41 #include "clang/Lex/LexDiagnostic.h" | |
42 #include "clang/Lex/Lexer.h" | |
43 #include "clang/Lex/LiteralSupport.h" | |
44 #include "clang/Lex/MacroArgs.h" | |
45 #include "clang/Lex/MacroInfo.h" | |
46 #include "clang/Lex/ModuleLoader.h" | |
47 #include "clang/Lex/Pragma.h" | |
48 #include "clang/Lex/PreprocessingRecord.h" | |
49 #include "clang/Lex/PreprocessorLexer.h" | |
50 #include "clang/Lex/PreprocessorOptions.h" | |
51 #include "clang/Lex/ScratchBuffer.h" | |
52 #include "clang/Lex/Token.h" | |
53 #include "clang/Lex/TokenLexer.h" | |
54 #include "llvm/ADT/APInt.h" | |
55 #include "llvm/ADT/ArrayRef.h" | |
56 #include "llvm/ADT/DenseMap.h" | |
57 #include "llvm/ADT/STLExtras.h" | |
58 #include "llvm/ADT/SmallString.h" | |
59 #include "llvm/ADT/SmallVector.h" | |
60 #include "llvm/ADT/StringRef.h" | |
61 #include "llvm/ADT/StringSwitch.h" | |
62 #include "llvm/Support/Capacity.h" | |
63 #include "llvm/Support/ErrorHandling.h" | |
64 #include "llvm/Support/MemoryBuffer.h" | |
65 #include "llvm/Support/raw_ostream.h" | |
66 #include <algorithm> | |
67 #include <cassert> | |
68 #include <memory> | |
69 #include <string> | |
70 #include <utility> | |
71 #include <vector> | |
72 | |
73 using namespace clang; | |
74 | |
75 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) | |
76 | |
77 ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; | |
78 | |
79 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, | |
80 DiagnosticsEngine &diags, LangOptions &opts, | |
81 SourceManager &SM, HeaderSearch &Headers, | |
82 ModuleLoader &TheModuleLoader, | |
83 IdentifierInfoLookup *IILookup, bool OwnsHeaders, | |
84 TranslationUnitKind TUKind) | |
85 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), | |
86 FileMgr(Headers.getFileMgr()), SourceMgr(SM), | |
87 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), | |
88 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), | |
89 // As the language options may have not been loaded yet (when | |
90 // deserializing an ASTUnit), adding keywords to the identifier table is | |
91 // deferred to Preprocessor::Initialize(). | |
92 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())), | |
93 TUKind(TUKind), SkipMainFilePreamble(0, true), | |
94 CurSubmoduleState(&NullSubmoduleState) { | |
95 OwnsHeaderSearch = OwnsHeaders; | |
96 | |
97 // Default to discarding comments. | |
98 KeepComments = false; | |
99 KeepMacroComments = false; | |
100 SuppressIncludeNotFoundError = false; | |
101 | |
102 // Macro expansion is enabled. | |
103 DisableMacroExpansion = false; | |
104 MacroExpansionInDirectivesOverride = false; | |
105 InMacroArgs = false; | |
106 ArgMacro = nullptr; | |
107 InMacroArgPreExpansion = false; | |
108 NumCachedTokenLexers = 0; | |
109 PragmasEnabled = true; | |
110 ParsingIfOrElifDirective = false; | |
111 PreprocessedOutput = false; | |
112 | |
113 // We haven't read anything from the external source. | |
114 ReadMacrosFromExternalSource = false; | |
115 | |
116 BuiltinInfo = std::make_unique<Builtin::Context>(); | |
117 | |
118 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of | |
119 // a macro. They get unpoisoned where it is allowed. | |
120 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); | |
121 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); | |
207 | 122 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); |
123 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); | |
150 | 124 |
125 // Initialize the pragma handlers. | |
126 RegisterBuiltinPragmas(); | |
127 | |
128 // Initialize builtin macros like __LINE__ and friends. | |
129 RegisterBuiltinMacros(); | |
130 | |
131 if(LangOpts.Borland) { | |
132 Ident__exception_info = getIdentifierInfo("_exception_info"); | |
133 Ident___exception_info = getIdentifierInfo("__exception_info"); | |
134 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); | |
135 Ident__exception_code = getIdentifierInfo("_exception_code"); | |
136 Ident___exception_code = getIdentifierInfo("__exception_code"); | |
137 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); | |
138 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); | |
139 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); | |
140 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); | |
141 } else { | |
142 Ident__exception_info = Ident__exception_code = nullptr; | |
143 Ident__abnormal_termination = Ident___exception_info = nullptr; | |
144 Ident___exception_code = Ident___abnormal_termination = nullptr; | |
145 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; | |
146 Ident_AbnormalTermination = nullptr; | |
147 } | |
148 | |
152 | 149 |
150 | 150 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens. |
151 if (usingPCHWithPragmaHdrStop()) | |
152 SkippingUntilPragmaHdrStop = true; | |
153 | |
154 // If using a PCH with a through header, start skipping tokens. | |
155 if (!this->PPOpts->PCHThroughHeader.empty() && | |
156 !this->PPOpts->ImplicitPCHInclude.empty()) | |
157 SkippingUntilPCHThroughHeader = true; | |
158 | |
152 | 159 #ifndef noCbC |
160 SavedDepth = 0; | |
161 SavedTokenFlag = false; | |
162 #endif | |
163 | |
150 | 164 if (this->PPOpts->GeneratePreamble) |
165 PreambleConditionalStack.startRecording(); | |
166 | |
167 ExcludedConditionalDirectiveSkipMappings = | |
168 this->PPOpts->ExcludedConditionalDirectiveSkipMappings; | |
169 if (ExcludedConditionalDirectiveSkipMappings) | |
170 ExcludedConditionalDirectiveSkipMappings->clear(); | |
171 | |
172 MaxTokens = LangOpts.MaxTokens; | |
173 } | |
174 | |
175 Preprocessor::~Preprocessor() { | |
176 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); | |
177 | |
178 IncludeMacroStack.clear(); | |
179 | |
180 // Destroy any macro definitions. | |
181 while (MacroInfoChain *I = MIChainHead) { | |
182 MIChainHead = I->Next; | |
183 I->~MacroInfoChain(); | |
184 } | |
185 | |
186 // Free any cached macro expanders. | |
187 // This populates MacroArgCache, so all TokenLexers need to be destroyed | |
188 // before the code below that frees up the MacroArgCache list. | |
189 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); | |
190 CurTokenLexer.reset(); | |
191 | |
192 // Free any cached MacroArgs. | |
193 for (MacroArgs *ArgList = MacroArgCache; ArgList;) | |
194 ArgList = ArgList->deallocate(); | |
195 | |
196 // Delete the header search info, if we own it. | |
197 if (OwnsHeaderSearch) | |
198 delete &HeaderInfo; | |
199 } | |
200 | |
201 void Preprocessor::Initialize(const TargetInfo &Target, | |
202 const TargetInfo *AuxTarget) { | |
203 assert((!this->Target || this->Target == &Target) && | |
204 "Invalid override of target information"); | |
205 this->Target = &Target; | |
206 | |
207 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && | |
208 "Invalid override of aux target information."); | |
209 this->AuxTarget = AuxTarget; | |
210 | |
211 // Initialize information about built-ins. | |
212 BuiltinInfo->InitializeTarget(Target, AuxTarget); | |
213 HeaderInfo.setTarget(Target); | |
214 | |
215 // Populate the identifier table with info about keywords for the current language. | |
216 Identifiers.AddKeywords(LangOpts); | |
217 } | |
218 | |
219 void Preprocessor::InitializeForModelFile() { | |
220 NumEnteredSourceFiles = 0; | |
221 | |
222 // Reset pragmas | |
223 PragmaHandlersBackup = std::move(PragmaHandlers); | |
224 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef()); | |
225 RegisterBuiltinPragmas(); | |
226 | |
227 // Reset PredefinesFileID | |
228 PredefinesFileID = FileID(); | |
229 } | |
230 | |
231 void Preprocessor::FinalizeForModelFile() { | |
232 NumEnteredSourceFiles = 1; | |
233 | |
234 PragmaHandlers = std::move(PragmaHandlersBackup); | |
235 } | |
236 | |
237 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { | |
238 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" | |
239 << getSpelling(Tok) << "'"; | |
240 | |
241 if (!DumpFlags) return; | |
242 | |
243 llvm::errs() << "\t"; | |
244 if (Tok.isAtStartOfLine()) | |
245 llvm::errs() << " [StartOfLine]"; | |
246 if (Tok.hasLeadingSpace()) | |
247 llvm::errs() << " [LeadingSpace]"; | |
248 if (Tok.isExpandDisabled()) | |
249 llvm::errs() << " [ExpandDisabled]"; | |
250 if (Tok.needsCleaning()) { | |
251 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); | |
252 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) | |
253 << "']"; | |
254 } | |
255 | |
256 llvm::errs() << "\tLoc=<"; | |
257 DumpLocation(Tok.getLocation()); | |
258 llvm::errs() << ">"; | |
259 } | |
260 | |
261 void Preprocessor::DumpLocation(SourceLocation Loc) const { | |
262 Loc.print(llvm::errs(), SourceMgr); | |
263 } | |
264 | |
265 void Preprocessor::DumpMacro(const MacroInfo &MI) const { | |
266 llvm::errs() << "MACRO: "; | |
267 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { | |
268 DumpToken(MI.getReplacementToken(i)); | |
269 llvm::errs() << " "; | |
270 } | |
271 llvm::errs() << "\n"; | |
272 } | |
273 | |
274 void Preprocessor::PrintStats() { | |
275 llvm::errs() << "\n*** Preprocessor Stats:\n"; | |
276 llvm::errs() << NumDirectives << " directives found:\n"; | |
277 llvm::errs() << " " << NumDefined << " #define.\n"; | |
278 llvm::errs() << " " << NumUndefined << " #undef.\n"; | |
279 llvm::errs() << " #include/#include_next/#import:\n"; | |
280 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; | |
281 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; | |
282 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; | |
207 | 283 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n"; |
150 | 284 llvm::errs() << " " << NumEndif << " #endif.\n"; |
285 llvm::errs() << " " << NumPragma << " #pragma.\n"; | |
286 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; | |
287 | |
288 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" | |
289 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " | |
290 << NumFastMacroExpanded << " on the fast path.\n"; | |
291 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) | |
292 << " token paste (##) operations performed, " | |
293 << NumFastTokenPaste << " on the fast path.\n"; | |
294 | |
295 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; | |
296 | |
297 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); | |
298 llvm::errs() << "\n Macro Expanded Tokens: " | |
299 << llvm::capacity_in_bytes(MacroExpandedTokens); | |
300 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); | |
301 // FIXME: List information for all submodules. | |
302 llvm::errs() << "\n Macros: " | |
303 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); | |
304 llvm::errs() << "\n #pragma push_macro Info: " | |
305 << llvm::capacity_in_bytes(PragmaPushMacroInfo); | |
306 llvm::errs() << "\n Poison Reasons: " | |
307 << llvm::capacity_in_bytes(PoisonReasons); | |
308 llvm::errs() << "\n Comment Handlers: " | |
309 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; | |
310 } | |
311 | |
312 Preprocessor::macro_iterator | |
313 Preprocessor::macro_begin(bool IncludeExternalMacros) const { | |
314 if (IncludeExternalMacros && ExternalSource && | |
315 !ReadMacrosFromExternalSource) { | |
316 ReadMacrosFromExternalSource = true; | |
317 ExternalSource->ReadDefinedMacros(); | |
318 } | |
319 | |
320 // Make sure we cover all macros in visible modules. | |
321 for (const ModuleMacro &Macro : ModuleMacros) | |
322 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); | |
323 | |
324 return CurSubmoduleState->Macros.begin(); | |
325 } | |
326 | |
327 size_t Preprocessor::getTotalMemory() const { | |
328 return BP.getTotalMemory() | |
329 + llvm::capacity_in_bytes(MacroExpandedTokens) | |
330 + Predefines.capacity() /* Predefines buffer. */ | |
331 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, | |
332 // and ModuleMacros. | |
333 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) | |
334 + llvm::capacity_in_bytes(PragmaPushMacroInfo) | |
335 + llvm::capacity_in_bytes(PoisonReasons) | |
336 + llvm::capacity_in_bytes(CommentHandlers); | |
337 } | |
338 | |
339 Preprocessor::macro_iterator | |
340 Preprocessor::macro_end(bool IncludeExternalMacros) const { | |
341 if (IncludeExternalMacros && ExternalSource && | |
342 !ReadMacrosFromExternalSource) { | |
343 ReadMacrosFromExternalSource = true; | |
344 ExternalSource->ReadDefinedMacros(); | |
345 } | |
346 | |
347 return CurSubmoduleState->Macros.end(); | |
348 } | |
349 | |
350 /// Compares macro tokens with a specified token value sequence. | |
351 static bool MacroDefinitionEquals(const MacroInfo *MI, | |
352 ArrayRef<TokenValue> Tokens) { | |
353 return Tokens.size() == MI->getNumTokens() && | |
354 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); | |
355 } | |
356 | |
357 StringRef Preprocessor::getLastMacroWithSpelling( | |
358 SourceLocation Loc, | |
359 ArrayRef<TokenValue> Tokens) const { | |
360 SourceLocation BestLocation; | |
361 StringRef BestSpelling; | |
362 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); | |
363 I != E; ++I) { | |
364 const MacroDirective::DefInfo | |
365 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); | |
366 if (!Def || !Def.getMacroInfo()) | |
367 continue; | |
368 if (!Def.getMacroInfo()->isObjectLike()) | |
369 continue; | |
370 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) | |
371 continue; | |
372 SourceLocation Location = Def.getLocation(); | |
373 // Choose the macro defined latest. | |
374 if (BestLocation.isInvalid() || | |
375 (Location.isValid() && | |
376 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { | |
377 BestLocation = Location; | |
378 BestSpelling = I->first->getName(); | |
379 } | |
380 } | |
381 return BestSpelling; | |
382 } | |
383 | |
384 void Preprocessor::recomputeCurLexerKind() { | |
385 if (CurLexer) | |
386 CurLexerKind = CLK_Lexer; | |
387 else if (CurTokenLexer) | |
388 CurLexerKind = CLK_TokenLexer; | |
389 else | |
390 CurLexerKind = CLK_CachingLexer; | |
391 } | |
392 | |
393 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, | |
394 unsigned CompleteLine, | |
395 unsigned CompleteColumn) { | |
396 assert(File); | |
397 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); | |
398 assert(!CodeCompletionFile && "Already set"); | |
399 | |
400 // Load the actual file's contents. | |
207 | 401 Optional<llvm::MemoryBufferRef> Buffer = |
402 SourceMgr.getMemoryBufferForFileOrNone(File); | |
403 if (!Buffer) | |
150 | 404 return true; |
405 | |
406 // Find the byte position of the truncation point. | |
407 const char *Position = Buffer->getBufferStart(); | |
408 for (unsigned Line = 1; Line < CompleteLine; ++Line) { | |
409 for (; *Position; ++Position) { | |
410 if (*Position != '\r' && *Position != '\n') | |
411 continue; | |
412 | |
413 // Eat \r\n or \n\r as a single line. | |
414 if ((Position[1] == '\r' || Position[1] == '\n') && | |
415 Position[0] != Position[1]) | |
416 ++Position; | |
417 ++Position; | |
418 break; | |
419 } | |
420 } | |
421 | |
422 Position += CompleteColumn - 1; | |
423 | |
424 // If pointing inside the preamble, adjust the position at the beginning of | |
425 // the file after the preamble. | |
426 if (SkipMainFilePreamble.first && | |
427 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { | |
428 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) | |
429 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; | |
430 } | |
431 | |
432 if (Position > Buffer->getBufferEnd()) | |
433 Position = Buffer->getBufferEnd(); | |
434 | |
435 CodeCompletionFile = File; | |
436 CodeCompletionOffset = Position - Buffer->getBufferStart(); | |
437 | |
438 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer( | |
439 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier()); | |
440 char *NewBuf = NewBuffer->getBufferStart(); | |
441 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); | |
442 *NewPos = '\0'; | |
443 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); | |
444 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); | |
445 | |
446 return false; | |
447 } | |
448 | |
449 void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir, | |
450 bool IsAngled) { | |
207 | 451 setCodeCompletionReached(); |
150 | 452 if (CodeComplete) |
453 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled); | |
454 } | |
455 | |
456 void Preprocessor::CodeCompleteNaturalLanguage() { | |
207 | 457 setCodeCompletionReached(); |
150 | 458 if (CodeComplete) |
459 CodeComplete->CodeCompleteNaturalLanguage(); | |
460 } | |
461 | |
462 /// getSpelling - This method is used to get the spelling of a token into a | |
463 /// SmallVector. Note that the returned StringRef may not point to the | |
464 /// supplied buffer if a copy can be avoided. | |
465 StringRef Preprocessor::getSpelling(const Token &Tok, | |
466 SmallVectorImpl<char> &Buffer, | |
467 bool *Invalid) const { | |
468 // NOTE: this has to be checked *before* testing for an IdentifierInfo. | |
469 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { | |
470 // Try the fast path. | |
471 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) | |
472 return II->getName(); | |
473 } | |
474 | |
475 // Resize the buffer if we need to copy into it. | |
476 if (Tok.needsCleaning()) | |
477 Buffer.resize(Tok.getLength()); | |
478 | |
479 const char *Ptr = Buffer.data(); | |
480 unsigned Len = getSpelling(Tok, Ptr, Invalid); | |
481 return StringRef(Ptr, Len); | |
482 } | |
483 | |
484 /// CreateString - Plop the specified string into a scratch buffer and return a | |
485 /// location for it. If specified, the source location provides a source | |
486 /// location for the token. | |
487 void Preprocessor::CreateString(StringRef Str, Token &Tok, | |
488 SourceLocation ExpansionLocStart, | |
489 SourceLocation ExpansionLocEnd) { | |
490 Tok.setLength(Str.size()); | |
491 | |
492 const char *DestPtr; | |
493 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); | |
494 | |
495 if (ExpansionLocStart.isValid()) | |
496 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, | |
497 ExpansionLocEnd, Str.size()); | |
498 Tok.setLocation(Loc); | |
499 | |
500 // If this is a raw identifier or a literal token, set the pointer data. | |
501 if (Tok.is(tok::raw_identifier)) | |
502 Tok.setRawIdentifierData(DestPtr); | |
503 else if (Tok.isLiteral()) | |
504 Tok.setLiteralData(DestPtr); | |
505 } | |
506 | |
507 SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) { | |
508 auto &SM = getSourceManager(); | |
509 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); | |
510 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc); | |
511 bool Invalid = false; | |
512 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); | |
513 if (Invalid) | |
514 return SourceLocation(); | |
515 | |
516 // FIXME: We could consider re-using spelling for tokens we see repeatedly. | |
517 const char *DestPtr; | |
518 SourceLocation Spelling = | |
519 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr); | |
520 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length)); | |
521 } | |
522 | |
523 Module *Preprocessor::getCurrentModule() { | |
524 if (!getLangOpts().isCompilingModule()) | |
525 return nullptr; | |
526 | |
527 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); | |
528 } | |
529 | |
530 //===----------------------------------------------------------------------===// | |
531 // Preprocessor Initialization Methods | |
532 //===----------------------------------------------------------------------===// | |
533 | |
534 /// EnterMainSourceFile - Enter the specified FileID as the main source file, | |
535 /// which implicitly adds the builtin defines etc. | |
536 void Preprocessor::EnterMainSourceFile() { | |
537 // We do not allow the preprocessor to reenter the main file. Doing so will | |
538 // cause FileID's to accumulate information from both runs (e.g. #line | |
539 // information) and predefined macros aren't guaranteed to be set properly. | |
540 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); | |
541 FileID MainFileID = SourceMgr.getMainFileID(); | |
542 | |
543 // If MainFileID is loaded it means we loaded an AST file, no need to enter | |
544 // a main file. | |
545 if (!SourceMgr.isLoadedFileID(MainFileID)) { | |
546 // Enter the main file source buffer. | |
547 EnterSourceFile(MainFileID, nullptr, SourceLocation()); | |
548 | |
549 // If we've been asked to skip bytes in the main file (e.g., as part of a | |
550 // precompiled preamble), do so now. | |
551 if (SkipMainFilePreamble.first > 0) | |
552 CurLexer->SetByteOffset(SkipMainFilePreamble.first, | |
553 SkipMainFilePreamble.second); | |
554 | |
555 // Tell the header info that the main file was entered. If the file is later | |
556 // #imported, it won't be re-entered. | |
557 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) | |
558 HeaderInfo.IncrementIncludeCount(FE); | |
559 } | |
560 | |
561 // Preprocess Predefines to populate the initial preprocessor state. | |
562 std::unique_ptr<llvm::MemoryBuffer> SB = | |
563 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); | |
564 assert(SB && "Cannot create predefined source buffer"); | |
565 FileID FID = SourceMgr.createFileID(std::move(SB)); | |
566 assert(FID.isValid() && "Could not create FileID for predefines?"); | |
567 setPredefinesFileID(FID); | |
568 | |
569 // Start parsing the predefines. | |
570 EnterSourceFile(FID, nullptr, SourceLocation()); | |
571 | |
572 if (!PPOpts->PCHThroughHeader.empty()) { | |
573 // Lookup and save the FileID for the through header. If it isn't found | |
574 // in the search path, it's a fatal error. | |
575 const DirectoryLookup *CurDir; | |
576 Optional<FileEntryRef> File = LookupFile( | |
577 SourceLocation(), PPOpts->PCHThroughHeader, | |
578 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir, | |
579 /*SearchPath=*/nullptr, /*RelativePath=*/nullptr, | |
580 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr, | |
581 /*IsFrameworkFound=*/nullptr); | |
582 if (!File) { | |
583 Diag(SourceLocation(), diag::err_pp_through_header_not_found) | |
584 << PPOpts->PCHThroughHeader; | |
585 return; | |
586 } | |
587 setPCHThroughHeaderFileID( | |
588 SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User)); | |
589 } | |
590 | |
591 // Skip tokens from the Predefines and if needed the main file. | |
592 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) || | |
593 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop)) | |
594 SkipTokensWhileUsingPCH(); | |
595 } | |
596 | |
597 void Preprocessor::setPCHThroughHeaderFileID(FileID FID) { | |
598 assert(PCHThroughHeaderFileID.isInvalid() && | |
599 "PCHThroughHeaderFileID already set!"); | |
600 PCHThroughHeaderFileID = FID; | |
601 } | |
602 | |
603 bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) { | |
604 assert(PCHThroughHeaderFileID.isValid() && | |
605 "Invalid PCH through header FileID"); | |
606 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID); | |
607 } | |
608 | |
609 bool Preprocessor::creatingPCHWithThroughHeader() { | |
610 return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() && | |
611 PCHThroughHeaderFileID.isValid(); | |
612 } | |
613 | |
614 bool Preprocessor::usingPCHWithThroughHeader() { | |
615 return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() && | |
616 PCHThroughHeaderFileID.isValid(); | |
617 } | |
618 | |
619 bool Preprocessor::creatingPCHWithPragmaHdrStop() { | |
620 return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop; | |
621 } | |
622 | |
623 bool Preprocessor::usingPCHWithPragmaHdrStop() { | |
624 return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop; | |
625 } | |
626 | |
627 /// Skip tokens until after the #include of the through header or | |
628 /// until after a #pragma hdrstop is seen. Tokens in the predefines file | |
629 /// and the main file may be skipped. If the end of the predefines file | |
630 /// is reached, skipping continues into the main file. If the end of the | |
631 /// main file is reached, it's a fatal error. | |
632 void Preprocessor::SkipTokensWhileUsingPCH() { | |
633 bool ReachedMainFileEOF = false; | |
634 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader; | |
635 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop; | |
636 Token Tok; | |
637 while (true) { | |
638 bool InPredefines = | |
639 (CurLexer && CurLexer->getFileID() == getPredefinesFileID()); | |
640 switch (CurLexerKind) { | |
641 case CLK_Lexer: | |
642 CurLexer->Lex(Tok); | |
643 break; | |
644 case CLK_TokenLexer: | |
645 CurTokenLexer->Lex(Tok); | |
646 break; | |
647 case CLK_CachingLexer: | |
648 CachingLex(Tok); | |
649 break; | |
650 case CLK_LexAfterModuleImport: | |
651 LexAfterModuleImport(Tok); | |
652 break; | |
653 } | |
654 if (Tok.is(tok::eof) && !InPredefines) { | |
655 ReachedMainFileEOF = true; | |
656 break; | |
657 } | |
658 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader) | |
659 break; | |
660 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop) | |
661 break; | |
662 } | |
663 if (ReachedMainFileEOF) { | |
664 if (UsingPCHThroughHeader) | |
665 Diag(SourceLocation(), diag::err_pp_through_header_not_seen) | |
666 << PPOpts->PCHThroughHeader << 1; | |
667 else if (!PPOpts->PCHWithHdrStopCreate) | |
668 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen); | |
669 } | |
670 } | |
671 | |
672 void Preprocessor::replayPreambleConditionalStack() { | |
673 // Restore the conditional stack from the preamble, if there is one. | |
674 if (PreambleConditionalStack.isReplaying()) { | |
675 assert(CurPPLexer && | |
676 "CurPPLexer is null when calling replayPreambleConditionalStack."); | |
677 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); | |
678 PreambleConditionalStack.doneReplaying(); | |
679 if (PreambleConditionalStack.reachedEOFWhileSkipping()) | |
680 SkipExcludedConditionalBlock( | |
681 PreambleConditionalStack.SkipInfo->HashTokenLoc, | |
682 PreambleConditionalStack.SkipInfo->IfTokenLoc, | |
683 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion, | |
684 PreambleConditionalStack.SkipInfo->FoundElse, | |
685 PreambleConditionalStack.SkipInfo->ElseLoc); | |
686 } | |
687 } | |
688 | |
689 void Preprocessor::EndSourceFile() { | |
690 // Notify the client that we reached the end of the source file. | |
691 if (Callbacks) | |
692 Callbacks->EndOfMainFile(); | |
693 } | |
694 | |
695 //===----------------------------------------------------------------------===// | |
696 // Lexer Event Handling. | |
697 //===----------------------------------------------------------------------===// | |
698 | |
699 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the | |
700 /// identifier information for the token and install it into the token, | |
701 /// updating the token kind accordingly. | |
702 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { | |
703 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); | |
704 | |
705 // Look up this token, see if it is a macro, or if it is a language keyword. | |
706 IdentifierInfo *II; | |
707 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { | |
708 // No cleaning needed, just use the characters from the lexed buffer. | |
709 II = getIdentifierInfo(Identifier.getRawIdentifier()); | |
710 } else { | |
711 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. | |
712 SmallString<64> IdentifierBuffer; | |
713 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); | |
714 | |
715 if (Identifier.hasUCN()) { | |
716 SmallString<64> UCNIdentifierBuffer; | |
717 expandUCNs(UCNIdentifierBuffer, CleanedStr); | |
718 II = getIdentifierInfo(UCNIdentifierBuffer); | |
719 } else { | |
720 II = getIdentifierInfo(CleanedStr); | |
721 } | |
722 } | |
723 | |
724 // Update the token info (identifier info and appropriate token kind). | |
725 Identifier.setIdentifierInfo(II); | |
726 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && | |
727 getSourceManager().isInSystemHeader(Identifier.getLocation())) | |
728 Identifier.setKind(tok::identifier); | |
729 else | |
730 Identifier.setKind(II->getTokenID()); | |
731 | |
732 return II; | |
733 } | |
734 | |
735 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { | |
736 PoisonReasons[II] = DiagID; | |
737 } | |
738 | |
739 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { | |
740 assert(Ident__exception_code && Ident__exception_info); | |
741 assert(Ident___exception_code && Ident___exception_info); | |
742 Ident__exception_code->setIsPoisoned(Poison); | |
743 Ident___exception_code->setIsPoisoned(Poison); | |
744 Ident_GetExceptionCode->setIsPoisoned(Poison); | |
745 Ident__exception_info->setIsPoisoned(Poison); | |
746 Ident___exception_info->setIsPoisoned(Poison); | |
747 Ident_GetExceptionInfo->setIsPoisoned(Poison); | |
748 Ident__abnormal_termination->setIsPoisoned(Poison); | |
749 Ident___abnormal_termination->setIsPoisoned(Poison); | |
750 Ident_AbnormalTermination->setIsPoisoned(Poison); | |
751 } | |
752 | |
753 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { | |
754 assert(Identifier.getIdentifierInfo() && | |
755 "Can't handle identifiers without identifier info!"); | |
756 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = | |
757 PoisonReasons.find(Identifier.getIdentifierInfo()); | |
758 if(it == PoisonReasons.end()) | |
759 Diag(Identifier, diag::err_pp_used_poisoned_id); | |
760 else | |
761 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); | |
762 } | |
763 | |
764 /// Returns a diagnostic message kind for reporting a future keyword as | |
765 /// appropriate for the identifier and specified language. | |
766 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, | |
767 const LangOptions &LangOpts) { | |
768 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); | |
769 | |
770 if (LangOpts.CPlusPlus) | |
771 return llvm::StringSwitch<diag::kind>(II.getName()) | |
772 #define CXX11_KEYWORD(NAME, FLAGS) \ | |
773 .Case(#NAME, diag::warn_cxx11_keyword) | |
173 | 774 #define CXX20_KEYWORD(NAME, FLAGS) \ |
775 .Case(#NAME, diag::warn_cxx20_keyword) | |
150 | 776 #include "clang/Basic/TokenKinds.def" |
173 | 777 // char8_t is not modeled as a CXX20_KEYWORD because it's not |
778 // unconditionally enabled in C++20 mode. (It can be disabled | |
779 // by -fno-char8_t.) | |
780 .Case("char8_t", diag::warn_cxx20_keyword) | |
150 | 781 ; |
782 | |
783 llvm_unreachable( | |
784 "Keyword not known to come from a newer Standard or proposed Standard"); | |
785 } | |
786 | |
787 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { | |
788 assert(II.isOutOfDate() && "not out of date"); | |
789 getExternalSource()->updateOutOfDateIdentifier(II); | |
790 } | |
791 | |
792 /// HandleIdentifier - This callback is invoked when the lexer reads an | |
793 /// identifier. This callback looks up the identifier in the map and/or | |
794 /// potentially macro expands it or turns it into a named token (like 'for'). | |
795 /// | |
796 /// Note that callers of this method are guarded by checking the | |
797 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the | |
798 /// IdentifierInfo methods that compute these properties will need to change to | |
799 /// match. | |
800 bool Preprocessor::HandleIdentifier(Token &Identifier) { | |
801 assert(Identifier.getIdentifierInfo() && | |
802 "Can't handle identifiers without identifier info!"); | |
803 | |
804 IdentifierInfo &II = *Identifier.getIdentifierInfo(); | |
805 | |
806 // If the information about this identifier is out of date, update it from | |
807 // the external source. | |
808 // We have to treat __VA_ARGS__ in a special way, since it gets | |
809 // serialized with isPoisoned = true, but our preprocessor may have | |
810 // unpoisoned it if we're defining a C99 macro. | |
811 if (II.isOutOfDate()) { | |
812 bool CurrentIsPoisoned = false; | |
813 const bool IsSpecialVariadicMacro = | |
814 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; | |
815 if (IsSpecialVariadicMacro) | |
816 CurrentIsPoisoned = II.isPoisoned(); | |
817 | |
818 updateOutOfDateIdentifier(II); | |
819 Identifier.setKind(II.getTokenID()); | |
820 | |
821 if (IsSpecialVariadicMacro) | |
822 II.setIsPoisoned(CurrentIsPoisoned); | |
823 } | |
824 | |
825 // If this identifier was poisoned, and if it was not produced from a macro | |
826 // expansion, emit an error. | |
827 if (II.isPoisoned() && CurPPLexer) { | |
828 HandlePoisonedIdentifier(Identifier); | |
829 } | |
830 | |
831 // If this is a macro to be expanded, do it. | |
832 if (MacroDefinition MD = getMacroDefinition(&II)) { | |
833 auto *MI = MD.getMacroInfo(); | |
834 assert(MI && "macro definition with no macro info?"); | |
835 if (!DisableMacroExpansion) { | |
836 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { | |
837 // C99 6.10.3p10: If the preprocessing token immediately after the | |
838 // macro name isn't a '(', this macro should not be expanded. | |
839 if (!MI->isFunctionLike() || isNextPPTokenLParen()) | |
840 return HandleMacroExpandedIdentifier(Identifier, MD); | |
841 } else { | |
842 // C99 6.10.3.4p2 says that a disabled macro may never again be | |
843 // expanded, even if it's in a context where it could be expanded in the | |
844 // future. | |
845 Identifier.setFlag(Token::DisableExpand); | |
846 if (MI->isObjectLike() || isNextPPTokenLParen()) | |
847 Diag(Identifier, diag::pp_disabled_macro_expansion); | |
848 } | |
849 } | |
850 } | |
851 | |
852 // If this identifier is a keyword in a newer Standard or proposed Standard, | |
853 // produce a warning. Don't warn if we're not considering macro expansion, | |
854 // since this identifier might be the name of a macro. | |
855 // FIXME: This warning is disabled in cases where it shouldn't be, like | |
856 // "#define constexpr constexpr", "int constexpr;" | |
857 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { | |
858 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) | |
859 << II.getName(); | |
860 // Don't diagnose this keyword again in this translation unit. | |
861 II.setIsFutureCompatKeyword(false); | |
862 } | |
863 | |
864 // If this is an extension token, diagnose its use. | |
865 // We avoid diagnosing tokens that originate from macro definitions. | |
866 // FIXME: This warning is disabled in cases where it shouldn't be, | |
867 // like "#define TY typeof", "TY(1) x". | |
868 if (II.isExtensionToken() && !DisableMacroExpansion) | |
869 Diag(Identifier, diag::ext_token_used); | |
870 | |
871 // If this is the 'import' contextual keyword following an '@', note | |
872 // that the next token indicates a module name. | |
873 // | |
874 // Note that we do not treat 'import' as a contextual | |
875 // keyword when we're in a caching lexer, because caching lexers only get | |
876 // used in contexts where import declarations are disallowed. | |
877 // | |
878 // Likewise if this is the C++ Modules TS import keyword. | |
879 if (((LastTokenWasAt && II.isModulesImport()) || | |
880 Identifier.is(tok::kw_import)) && | |
881 !InMacroArgs && !DisableMacroExpansion && | |
882 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && | |
883 CurLexerKind != CLK_CachingLexer) { | |
884 ModuleImportLoc = Identifier.getLocation(); | |
885 ModuleImportPath.clear(); | |
886 ModuleImportExpectsIdentifier = true; | |
887 CurLexerKind = CLK_LexAfterModuleImport; | |
888 } | |
889 return true; | |
890 } | |
891 | |
892 void Preprocessor::Lex(Token &Result) { | |
893 ++LexLevel; | |
894 | |
895 // We loop here until a lex function returns a token; this avoids recursion. | |
896 bool ReturnedToken; | |
897 do { | |
898 switch (CurLexerKind) { | |
899 case CLK_Lexer: | |
152 | 900 #ifndef noCbC |
901 ReturnedToken = CurLexer->Lex(Result, ProtoParsing); | |
902 #else | |
150 | 903 ReturnedToken = CurLexer->Lex(Result); |
152 | 904 #endif |
150 | 905 break; |
906 case CLK_TokenLexer: | |
907 ReturnedToken = CurTokenLexer->Lex(Result); | |
908 break; | |
909 case CLK_CachingLexer: | |
910 CachingLex(Result); | |
911 ReturnedToken = true; | |
912 break; | |
913 case CLK_LexAfterModuleImport: | |
914 ReturnedToken = LexAfterModuleImport(Result); | |
915 break; | |
916 } | |
917 } while (!ReturnedToken); | |
918 | |
919 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure) | |
920 return; | |
921 | |
922 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) { | |
923 // Remember the identifier before code completion token. | |
924 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); | |
925 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc()); | |
926 // Set IdenfitierInfo to null to avoid confusing code that handles both | |
927 // identifiers and completion tokens. | |
928 Result.setIdentifierInfo(nullptr); | |
929 } | |
930 | |
931 // Update ImportSeqState to track our position within a C++20 import-seq | |
932 // if this token is being produced as a result of phase 4 of translation. | |
933 if (getLangOpts().CPlusPlusModules && LexLevel == 1 && | |
934 !Result.getFlag(Token::IsReinjected)) { | |
935 switch (Result.getKind()) { | |
936 case tok::l_paren: case tok::l_square: case tok::l_brace: | |
937 ImportSeqState.handleOpenBracket(); | |
938 break; | |
939 case tok::r_paren: case tok::r_square: | |
940 ImportSeqState.handleCloseBracket(); | |
941 break; | |
942 case tok::r_brace: | |
943 ImportSeqState.handleCloseBrace(); | |
944 break; | |
945 case tok::semi: | |
946 ImportSeqState.handleSemi(); | |
947 break; | |
948 case tok::header_name: | |
949 case tok::annot_header_unit: | |
950 ImportSeqState.handleHeaderName(); | |
951 break; | |
952 case tok::kw_export: | |
953 ImportSeqState.handleExport(); | |
954 break; | |
955 case tok::identifier: | |
956 if (Result.getIdentifierInfo()->isModulesImport()) { | |
957 ImportSeqState.handleImport(); | |
958 if (ImportSeqState.afterImportSeq()) { | |
959 ModuleImportLoc = Result.getLocation(); | |
960 ModuleImportPath.clear(); | |
961 ModuleImportExpectsIdentifier = true; | |
962 CurLexerKind = CLK_LexAfterModuleImport; | |
963 } | |
964 break; | |
965 } | |
966 LLVM_FALLTHROUGH; | |
967 default: | |
968 ImportSeqState.handleMisc(); | |
969 break; | |
970 } | |
971 } | |
972 | |
973 LastTokenWasAt = Result.is(tok::at); | |
974 --LexLevel; | |
975 | |
207 | 976 if ((LexLevel == 0 || PreprocessToken) && |
977 !Result.getFlag(Token::IsReinjected)) { | |
978 if (LexLevel == 0) | |
979 ++TokenCount; | |
150 | 980 if (OnToken) |
981 OnToken(Result); | |
982 } | |
983 } | |
984 | |
985 /// Lex a header-name token (including one formed from header-name-tokens if | |
986 /// \p AllowConcatenation is \c true). | |
987 /// | |
988 /// \param FilenameTok Filled in with the next token. On success, this will | |
989 /// be either a header_name token. On failure, it will be whatever other | |
990 /// token was found instead. | |
991 /// \param AllowMacroExpansion If \c true, allow the header name to be formed | |
992 /// by macro expansion (concatenating tokens as necessary if the first | |
993 /// token is a '<'). | |
994 /// \return \c true if we reached EOD or EOF while looking for a > token in | |
995 /// a concatenated header name and diagnosed it. \c false otherwise. | |
996 bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { | |
997 // Lex using header-name tokenization rules if tokens are being lexed from | |
998 // a file. Just grab a token normally if we're in a macro expansion. | |
999 if (CurPPLexer) | |
1000 CurPPLexer->LexIncludeFilename(FilenameTok); | |
1001 else | |
1002 Lex(FilenameTok); | |
1003 | |
1004 // This could be a <foo/bar.h> file coming from a macro expansion. In this | |
1005 // case, glue the tokens together into an angle_string_literal token. | |
1006 SmallString<128> FilenameBuffer; | |
1007 if (FilenameTok.is(tok::less) && AllowMacroExpansion) { | |
1008 bool StartOfLine = FilenameTok.isAtStartOfLine(); | |
1009 bool LeadingSpace = FilenameTok.hasLeadingSpace(); | |
1010 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro(); | |
1011 | |
1012 SourceLocation Start = FilenameTok.getLocation(); | |
1013 SourceLocation End; | |
1014 FilenameBuffer.push_back('<'); | |
1015 | |
1016 // Consume tokens until we find a '>'. | |
1017 // FIXME: A header-name could be formed starting or ending with an | |
1018 // alternative token. It's not clear whether that's ill-formed in all | |
1019 // cases. | |
1020 while (FilenameTok.isNot(tok::greater)) { | |
1021 Lex(FilenameTok); | |
1022 if (FilenameTok.isOneOf(tok::eod, tok::eof)) { | |
1023 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater; | |
1024 Diag(Start, diag::note_matching) << tok::less; | |
1025 return true; | |
1026 } | |
1027 | |
1028 End = FilenameTok.getLocation(); | |
1029 | |
1030 // FIXME: Provide code completion for #includes. | |
1031 if (FilenameTok.is(tok::code_completion)) { | |
1032 setCodeCompletionReached(); | |
1033 Lex(FilenameTok); | |
1034 continue; | |
1035 } | |
1036 | |
1037 // Append the spelling of this token to the buffer. If there was a space | |
1038 // before it, add it now. | |
1039 if (FilenameTok.hasLeadingSpace()) | |
1040 FilenameBuffer.push_back(' '); | |
1041 | |
1042 // Get the spelling of the token, directly into FilenameBuffer if | |
1043 // possible. | |
1044 size_t PreAppendSize = FilenameBuffer.size(); | |
1045 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength()); | |
1046 | |
1047 const char *BufPtr = &FilenameBuffer[PreAppendSize]; | |
1048 unsigned ActualLen = getSpelling(FilenameTok, BufPtr); | |
1049 | |
1050 // If the token was spelled somewhere else, copy it into FilenameBuffer. | |
1051 if (BufPtr != &FilenameBuffer[PreAppendSize]) | |
1052 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen); | |
1053 | |
1054 // Resize FilenameBuffer to the correct size. | |
1055 if (FilenameTok.getLength() != ActualLen) | |
1056 FilenameBuffer.resize(PreAppendSize + ActualLen); | |
1057 } | |
1058 | |
1059 FilenameTok.startToken(); | |
1060 FilenameTok.setKind(tok::header_name); | |
1061 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine); | |
1062 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace); | |
1063 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro); | |
1064 CreateString(FilenameBuffer, FilenameTok, Start, End); | |
1065 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) { | |
1066 // Convert a string-literal token of the form " h-char-sequence " | |
1067 // (produced by macro expansion) into a header-name token. | |
1068 // | |
1069 // The rules for header-names don't quite match the rules for | |
1070 // string-literals, but all the places where they differ result in | |
1071 // undefined behavior, so we can and do treat them the same. | |
1072 // | |
1073 // A string-literal with a prefix or suffix is not translated into a | |
1074 // header-name. This could theoretically be observable via the C++20 | |
1075 // context-sensitive header-name formation rules. | |
1076 StringRef Str = getSpelling(FilenameTok, FilenameBuffer); | |
1077 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"') | |
1078 FilenameTok.setKind(tok::header_name); | |
1079 } | |
1080 | |
1081 return false; | |
1082 } | |
1083 | |
1084 /// Collect the tokens of a C++20 pp-import-suffix. | |
1085 void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { | |
1086 // FIXME: For error recovery, consider recognizing attribute syntax here | |
1087 // and terminating / diagnosing a missing semicolon if we find anything | |
1088 // else? (Can we leave that to the parser?) | |
1089 unsigned BracketDepth = 0; | |
1090 while (true) { | |
1091 Toks.emplace_back(); | |
1092 Lex(Toks.back()); | |
1093 | |
1094 switch (Toks.back().getKind()) { | |
1095 case tok::l_paren: case tok::l_square: case tok::l_brace: | |
1096 ++BracketDepth; | |
1097 break; | |
1098 | |
1099 case tok::r_paren: case tok::r_square: case tok::r_brace: | |
1100 if (BracketDepth == 0) | |
1101 return; | |
1102 --BracketDepth; | |
1103 break; | |
1104 | |
1105 case tok::semi: | |
1106 if (BracketDepth == 0) | |
1107 return; | |
1108 break; | |
1109 | |
1110 case tok::eof: | |
1111 return; | |
1112 | |
1113 default: | |
1114 break; | |
1115 } | |
1116 } | |
1117 } | |
1118 | |
1119 | |
1120 /// Lex a token following the 'import' contextual keyword. | |
1121 /// | |
1122 /// pp-import: [C++20] | |
1123 /// import header-name pp-import-suffix[opt] ; | |
1124 /// import header-name-tokens pp-import-suffix[opt] ; | |
1125 /// [ObjC] @ import module-name ; | |
1126 /// [Clang] import module-name ; | |
1127 /// | |
1128 /// header-name-tokens: | |
1129 /// string-literal | |
1130 /// < [any sequence of preprocessing-tokens other than >] > | |
1131 /// | |
1132 /// module-name: | |
1133 /// module-name-qualifier[opt] identifier | |
1134 /// | |
1135 /// module-name-qualifier | |
1136 /// module-name-qualifier[opt] identifier . | |
1137 /// | |
1138 /// We respond to a pp-import by importing macros from the named module. | |
1139 bool Preprocessor::LexAfterModuleImport(Token &Result) { | |
1140 // Figure out what kind of lexer we actually have. | |
1141 recomputeCurLexerKind(); | |
1142 | |
1143 // Lex the next token. The header-name lexing rules are used at the start of | |
1144 // a pp-import. | |
1145 // | |
1146 // For now, we only support header-name imports in C++20 mode. | |
1147 // FIXME: Should we allow this in all language modes that support an import | |
1148 // declaration as an extension? | |
1149 if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { | |
1150 if (LexHeaderName(Result)) | |
1151 return true; | |
1152 } else { | |
1153 Lex(Result); | |
1154 } | |
1155 | |
1156 // Allocate a holding buffer for a sequence of tokens and introduce it into | |
1157 // the token stream. | |
1158 auto EnterTokens = [this](ArrayRef<Token> Toks) { | |
1159 auto ToksCopy = std::make_unique<Token[]>(Toks.size()); | |
1160 std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); | |
1161 EnterTokenStream(std::move(ToksCopy), Toks.size(), | |
1162 /*DisableMacroExpansion*/ true, /*IsReinject*/ false); | |
1163 }; | |
1164 | |
1165 // Check for a header-name. | |
1166 SmallVector<Token, 32> Suffix; | |
1167 if (Result.is(tok::header_name)) { | |
1168 // Enter the header-name token into the token stream; a Lex action cannot | |
1169 // both return a token and cache tokens (doing so would corrupt the token | |
1170 // cache if the call to Lex comes from CachingLex / PeekAhead). | |
1171 Suffix.push_back(Result); | |
1172 | |
1173 // Consume the pp-import-suffix and expand any macros in it now. We'll add | |
1174 // it back into the token stream later. | |
1175 CollectPpImportSuffix(Suffix); | |
1176 if (Suffix.back().isNot(tok::semi)) { | |
1177 // This is not a pp-import after all. | |
1178 EnterTokens(Suffix); | |
1179 return false; | |
1180 } | |
1181 | |
1182 // C++2a [cpp.module]p1: | |
1183 // The ';' preprocessing-token terminating a pp-import shall not have | |
1184 // been produced by macro replacement. | |
1185 SourceLocation SemiLoc = Suffix.back().getLocation(); | |
1186 if (SemiLoc.isMacroID()) | |
1187 Diag(SemiLoc, diag::err_header_import_semi_in_macro); | |
1188 | |
1189 // Reconstitute the import token. | |
1190 Token ImportTok; | |
1191 ImportTok.startToken(); | |
1192 ImportTok.setKind(tok::kw_import); | |
1193 ImportTok.setLocation(ModuleImportLoc); | |
1194 ImportTok.setIdentifierInfo(getIdentifierInfo("import")); | |
1195 ImportTok.setLength(6); | |
1196 | |
1197 auto Action = HandleHeaderIncludeOrImport( | |
1198 /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); | |
1199 switch (Action.Kind) { | |
1200 case ImportAction::None: | |
1201 break; | |
1202 | |
1203 case ImportAction::ModuleBegin: | |
1204 // Let the parser know we're textually entering the module. | |
1205 Suffix.emplace_back(); | |
1206 Suffix.back().startToken(); | |
1207 Suffix.back().setKind(tok::annot_module_begin); | |
1208 Suffix.back().setLocation(SemiLoc); | |
1209 Suffix.back().setAnnotationEndLoc(SemiLoc); | |
1210 Suffix.back().setAnnotationValue(Action.ModuleForHeader); | |
1211 LLVM_FALLTHROUGH; | |
1212 | |
1213 case ImportAction::ModuleImport: | |
1214 case ImportAction::SkippedModuleImport: | |
1215 // We chose to import (or textually enter) the file. Convert the | |
1216 // header-name token into a header unit annotation token. | |
1217 Suffix[0].setKind(tok::annot_header_unit); | |
1218 Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); | |
1219 Suffix[0].setAnnotationValue(Action.ModuleForHeader); | |
1220 // FIXME: Call the moduleImport callback? | |
1221 break; | |
1222 case ImportAction::Failure: | |
1223 assert(TheModuleLoader.HadFatalFailure && | |
1224 "This should be an early exit only to a fatal error"); | |
1225 Result.setKind(tok::eof); | |
1226 CurLexer->cutOffLexing(); | |
1227 EnterTokens(Suffix); | |
1228 return true; | |
1229 } | |
1230 | |
1231 EnterTokens(Suffix); | |
1232 return false; | |
1233 } | |
1234 | |
1235 // The token sequence | |
1236 // | |
1237 // import identifier (. identifier)* | |
1238 // | |
1239 // indicates a module import directive. We already saw the 'import' | |
1240 // contextual keyword, so now we're looking for the identifiers. | |
1241 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { | |
1242 // We expected to see an identifier here, and we did; continue handling | |
1243 // identifiers. | |
1244 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), | |
1245 Result.getLocation())); | |
1246 ModuleImportExpectsIdentifier = false; | |
1247 CurLexerKind = CLK_LexAfterModuleImport; | |
1248 return true; | |
1249 } | |
1250 | |
1251 // If we're expecting a '.' or a ';', and we got a '.', then wait until we | |
1252 // see the next identifier. (We can also see a '[[' that begins an | |
1253 // attribute-specifier-seq here under the C++ Modules TS.) | |
1254 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { | |
1255 ModuleImportExpectsIdentifier = true; | |
1256 CurLexerKind = CLK_LexAfterModuleImport; | |
1257 return true; | |
1258 } | |
1259 | |
1260 // If we didn't recognize a module name at all, this is not a (valid) import. | |
1261 if (ModuleImportPath.empty() || Result.is(tok::eof)) | |
1262 return true; | |
1263 | |
1264 // Consume the pp-import-suffix and expand any macros in it now, if we're not | |
1265 // at the semicolon already. | |
1266 SourceLocation SemiLoc = Result.getLocation(); | |
1267 if (Result.isNot(tok::semi)) { | |
1268 Suffix.push_back(Result); | |
1269 CollectPpImportSuffix(Suffix); | |
1270 if (Suffix.back().isNot(tok::semi)) { | |
1271 // This is not an import after all. | |
1272 EnterTokens(Suffix); | |
1273 return false; | |
1274 } | |
1275 SemiLoc = Suffix.back().getLocation(); | |
1276 } | |
1277 | |
1278 // Under the Modules TS, the dot is just part of the module name, and not | |
1279 // a real hierarchy separator. Flatten such module names now. | |
1280 // | |
1281 // FIXME: Is this the right level to be performing this transformation? | |
1282 std::string FlatModuleName; | |
1283 if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) { | |
1284 for (auto &Piece : ModuleImportPath) { | |
1285 if (!FlatModuleName.empty()) | |
1286 FlatModuleName += "."; | |
1287 FlatModuleName += Piece.first->getName(); | |
1288 } | |
1289 SourceLocation FirstPathLoc = ModuleImportPath[0].second; | |
1290 ModuleImportPath.clear(); | |
1291 ModuleImportPath.push_back( | |
1292 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); | |
1293 } | |
1294 | |
1295 Module *Imported = nullptr; | |
1296 if (getLangOpts().Modules) { | |
1297 Imported = TheModuleLoader.loadModule(ModuleImportLoc, | |
1298 ModuleImportPath, | |
1299 Module::Hidden, | |
1300 /*IsInclusionDirective=*/false); | |
1301 if (Imported) | |
1302 makeModuleVisible(Imported, SemiLoc); | |
1303 } | |
1304 if (Callbacks) | |
1305 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); | |
1306 | |
1307 if (!Suffix.empty()) { | |
1308 EnterTokens(Suffix); | |
1309 return false; | |
1310 } | |
1311 return true; | |
1312 } | |
1313 | |
1314 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { | |
1315 CurSubmoduleState->VisibleModules.setVisible( | |
1316 M, Loc, [](Module *) {}, | |
1317 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { | |
1318 // FIXME: Include the path in the diagnostic. | |
1319 // FIXME: Include the import location for the conflicting module. | |
1320 Diag(ModuleImportLoc, diag::warn_module_conflict) | |
1321 << Path[0]->getFullModuleName() | |
1322 << Conflict->getFullModuleName() | |
1323 << Message; | |
1324 }); | |
1325 | |
1326 // Add this module to the imports list of the currently-built submodule. | |
1327 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) | |
1328 BuildingSubmoduleStack.back().M->Imports.insert(M); | |
1329 } | |
1330 | |
1331 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, | |
1332 const char *DiagnosticTag, | |
1333 bool AllowMacroExpansion) { | |
1334 // We need at least one string literal. | |
1335 if (Result.isNot(tok::string_literal)) { | |
1336 Diag(Result, diag::err_expected_string_literal) | |
1337 << /*Source='in...'*/0 << DiagnosticTag; | |
1338 return false; | |
1339 } | |
1340 | |
1341 // Lex string literal tokens, optionally with macro expansion. | |
1342 SmallVector<Token, 4> StrToks; | |
1343 do { | |
1344 StrToks.push_back(Result); | |
1345 | |
1346 if (Result.hasUDSuffix()) | |
1347 Diag(Result, diag::err_invalid_string_udl); | |
1348 | |
1349 if (AllowMacroExpansion) | |
1350 Lex(Result); | |
1351 else | |
1352 LexUnexpandedToken(Result); | |
1353 } while (Result.is(tok::string_literal)); | |
1354 | |
1355 // Concatenate and parse the strings. | |
1356 StringLiteralParser Literal(StrToks, *this); | |
1357 assert(Literal.isAscii() && "Didn't allow wide strings in"); | |
1358 | |
1359 if (Literal.hadError) | |
1360 return false; | |
1361 | |
1362 if (Literal.Pascal) { | |
1363 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) | |
1364 << /*Source='in...'*/0 << DiagnosticTag; | |
1365 return false; | |
1366 } | |
1367 | |
1368 String = std::string(Literal.GetString()); | |
1369 return true; | |
1370 } | |
1371 | |
1372 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { | |
1373 assert(Tok.is(tok::numeric_constant)); | |
1374 SmallString<8> IntegerBuffer; | |
1375 bool NumberInvalid = false; | |
1376 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); | |
1377 if (NumberInvalid) | |
1378 return false; | |
207 | 1379 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(), |
1380 getLangOpts(), getTargetInfo(), | |
1381 getDiagnostics()); | |
150 | 1382 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) |
1383 return false; | |
1384 llvm::APInt APVal(64, 0); | |
1385 if (Literal.GetIntegerValue(APVal)) | |
1386 return false; | |
1387 Lex(Tok); | |
1388 Value = APVal.getLimitedValue(); | |
1389 return true; | |
1390 } | |
1391 | |
1392 void Preprocessor::addCommentHandler(CommentHandler *Handler) { | |
1393 assert(Handler && "NULL comment handler"); | |
1394 assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() && | |
1395 "Comment handler already registered"); | |
1396 CommentHandlers.push_back(Handler); | |
1397 } | |
1398 | |
1399 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { | |
1400 std::vector<CommentHandler *>::iterator Pos = | |
1401 llvm::find(CommentHandlers, Handler); | |
1402 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); | |
1403 CommentHandlers.erase(Pos); | |
1404 } | |
1405 | |
1406 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { | |
1407 bool AnyPendingTokens = false; | |
1408 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), | |
1409 HEnd = CommentHandlers.end(); | |
1410 H != HEnd; ++H) { | |
1411 if ((*H)->HandleComment(*this, Comment)) | |
1412 AnyPendingTokens = true; | |
1413 } | |
1414 if (!AnyPendingTokens || getCommentRetentionState()) | |
1415 return false; | |
1416 Lex(result); | |
1417 return true; | |
1418 } | |
1419 | |
1420 ModuleLoader::~ModuleLoader() = default; | |
1421 | |
1422 CommentHandler::~CommentHandler() = default; | |
1423 | |
207 | 1424 EmptylineHandler::~EmptylineHandler() = default; |
1425 | |
150 | 1426 CodeCompletionHandler::~CodeCompletionHandler() = default; |
1427 | |
1428 void Preprocessor::createPreprocessingRecord() { | |
1429 if (Record) | |
1430 return; | |
1431 | |
1432 Record = new PreprocessingRecord(getSourceManager()); | |
1433 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); | |
1434 } | |
197 | 1435 |
1436 #ifndef noCbC | |
1437 | |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1438 Token Preprocessor::ReadFromString(const char *src , SourceLocation Loc) { |
197 | 1439 // Push the ( "string" ) tokens into the token stream. |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1440 MacroInfo *MI = AllocateMacroInfo(Loc); |
205 | 1441 Token Tok; |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1442 bool Invalid = false; |
200 | 1443 std::unique_ptr<Lexer> lx(new Lexer(CurLexer->getFileID(),getSourceManager().getBuffer(CurLexer->getFileID(), Loc, &Invalid),*this)); |
1444 lx->InitLexer(src,src,src + strlen(src)); | |
1445 lx->Lex(Tok); | |
1446 CurLexer.swap(lx); | |
205 | 1447 int i = 0; |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1448 while (Tok.getKind() != tok::TokenKind::eof) { |
204
e348f3e5c8b2
ReadFromString worked.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
201
diff
changeset
|
1449 Tok.setLocation(Loc); |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1450 MI->AddTokenToBody(Tok); |
205 | 1451 Lex(Tok); i++; |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1452 } |
205 | 1453 Tok.setLocation(Loc); |
1454 MI->AddTokenToBody(Tok); i++; | |
1455 MI->DefinitionLength = i; | |
200 | 1456 CurLexer = std::move(lx); |
201 | 1457 CurPPLexer = CurLexer.get(); |
205 | 1458 EnterMacro(Tok, Loc, MI , 0 ); |
1459 CurTokenLexer->MacroDefLength = i; | |
1460 return Tok; | |
197 | 1461 } |
1462 | |
1463 #endif |