Mercurial > hg > CbC > CbC_llvm
annotate clang/lib/Lex/Preprocessor.cpp @ 199:5784c86f13b3
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 03 Jun 2021 00:48:49 +0900 |
parents | cef006dc7fd5 |
children | 40b1cab18437 |
rev | line source |
---|---|
150 | 1 //===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===// |
2 // | |
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
4 // See https://llvm.org/LICENSE.txt for license information. | |
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
6 // | |
7 //===----------------------------------------------------------------------===// | |
8 // | |
9 // This file implements the Preprocessor interface. | |
10 // | |
11 //===----------------------------------------------------------------------===// | |
12 // | |
13 // Options to support: | |
14 // -H - Print the name of each header file used. | |
15 // -d[DNI] - Dump various things. | |
16 // -fworking-directory - #line's with preprocessor's working dir. | |
17 // -fpreprocessed | |
18 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD | |
19 // -W* | |
20 // -w | |
21 // | |
22 // Messages to emit: | |
23 // "Multiple include guards may be useful for:\n" | |
24 // | |
25 //===----------------------------------------------------------------------===// | |
26 | |
27 #include "clang/Lex/Preprocessor.h" | |
28 #include "clang/Basic/Builtins.h" | |
29 #include "clang/Basic/FileManager.h" | |
30 #include "clang/Basic/FileSystemStatCache.h" | |
31 #include "clang/Basic/IdentifierTable.h" | |
32 #include "clang/Basic/LLVM.h" | |
33 #include "clang/Basic/LangOptions.h" | |
34 #include "clang/Basic/Module.h" | |
35 #include "clang/Basic/SourceLocation.h" | |
36 #include "clang/Basic/SourceManager.h" | |
37 #include "clang/Basic/TargetInfo.h" | |
38 #include "clang/Lex/CodeCompletionHandler.h" | |
39 #include "clang/Lex/ExternalPreprocessorSource.h" | |
40 #include "clang/Lex/HeaderSearch.h" | |
41 #include "clang/Lex/LexDiagnostic.h" | |
42 #include "clang/Lex/Lexer.h" | |
43 #include "clang/Lex/LiteralSupport.h" | |
44 #include "clang/Lex/MacroArgs.h" | |
45 #include "clang/Lex/MacroInfo.h" | |
46 #include "clang/Lex/ModuleLoader.h" | |
47 #include "clang/Lex/Pragma.h" | |
48 #include "clang/Lex/PreprocessingRecord.h" | |
49 #include "clang/Lex/PreprocessorLexer.h" | |
50 #include "clang/Lex/PreprocessorOptions.h" | |
51 #include "clang/Lex/ScratchBuffer.h" | |
52 #include "clang/Lex/Token.h" | |
53 #include "clang/Lex/TokenLexer.h" | |
54 #include "llvm/ADT/APInt.h" | |
55 #include "llvm/ADT/ArrayRef.h" | |
56 #include "llvm/ADT/DenseMap.h" | |
57 #include "llvm/ADT/STLExtras.h" | |
58 #include "llvm/ADT/SmallString.h" | |
59 #include "llvm/ADT/SmallVector.h" | |
60 #include "llvm/ADT/StringRef.h" | |
61 #include "llvm/ADT/StringSwitch.h" | |
62 #include "llvm/Support/Capacity.h" | |
63 #include "llvm/Support/ErrorHandling.h" | |
64 #include "llvm/Support/MemoryBuffer.h" | |
65 #include "llvm/Support/raw_ostream.h" | |
66 #include <algorithm> | |
67 #include <cassert> | |
68 #include <memory> | |
69 #include <string> | |
70 #include <utility> | |
71 #include <vector> | |
72 | |
73 using namespace clang; | |
74 | |
75 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) | |
76 | |
77 ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; | |
78 | |
79 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, | |
80 DiagnosticsEngine &diags, LangOptions &opts, | |
81 SourceManager &SM, HeaderSearch &Headers, | |
82 ModuleLoader &TheModuleLoader, | |
83 IdentifierInfoLookup *IILookup, bool OwnsHeaders, | |
84 TranslationUnitKind TUKind) | |
85 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), | |
86 FileMgr(Headers.getFileMgr()), SourceMgr(SM), | |
87 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), | |
88 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), | |
89 // As the language options may have not been loaded yet (when | |
90 // deserializing an ASTUnit), adding keywords to the identifier table is | |
91 // deferred to Preprocessor::Initialize(). | |
92 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())), | |
93 TUKind(TUKind), SkipMainFilePreamble(0, true), | |
94 CurSubmoduleState(&NullSubmoduleState) { | |
95 OwnsHeaderSearch = OwnsHeaders; | |
96 | |
97 // Default to discarding comments. | |
98 KeepComments = false; | |
99 KeepMacroComments = false; | |
100 SuppressIncludeNotFoundError = false; | |
101 | |
102 // Macro expansion is enabled. | |
103 DisableMacroExpansion = false; | |
104 MacroExpansionInDirectivesOverride = false; | |
105 InMacroArgs = false; | |
106 ArgMacro = nullptr; | |
107 InMacroArgPreExpansion = false; | |
108 NumCachedTokenLexers = 0; | |
109 PragmasEnabled = true; | |
110 ParsingIfOrElifDirective = false; | |
111 PreprocessedOutput = false; | |
112 | |
113 // We haven't read anything from the external source. | |
114 ReadMacrosFromExternalSource = false; | |
115 | |
116 BuiltinInfo = std::make_unique<Builtin::Context>(); | |
117 | |
118 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of | |
119 // a macro. They get unpoisoned where it is allowed. | |
120 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); | |
121 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); | |
173 | 122 if (getLangOpts().CPlusPlus20) { |
150 | 123 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); |
124 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); | |
125 } else { | |
126 Ident__VA_OPT__ = nullptr; | |
127 } | |
128 | |
129 // Initialize the pragma handlers. | |
130 RegisterBuiltinPragmas(); | |
131 | |
132 // Initialize builtin macros like __LINE__ and friends. | |
133 RegisterBuiltinMacros(); | |
134 | |
135 if(LangOpts.Borland) { | |
136 Ident__exception_info = getIdentifierInfo("_exception_info"); | |
137 Ident___exception_info = getIdentifierInfo("__exception_info"); | |
138 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); | |
139 Ident__exception_code = getIdentifierInfo("_exception_code"); | |
140 Ident___exception_code = getIdentifierInfo("__exception_code"); | |
141 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); | |
142 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); | |
143 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); | |
144 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); | |
145 } else { | |
146 Ident__exception_info = Ident__exception_code = nullptr; | |
147 Ident__abnormal_termination = Ident___exception_info = nullptr; | |
148 Ident___exception_code = Ident___abnormal_termination = nullptr; | |
149 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; | |
150 Ident_AbnormalTermination = nullptr; | |
151 } | |
152 | |
152 | 153 |
150 | 154 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens. |
155 if (usingPCHWithPragmaHdrStop()) | |
156 SkippingUntilPragmaHdrStop = true; | |
157 | |
158 // If using a PCH with a through header, start skipping tokens. | |
159 if (!this->PPOpts->PCHThroughHeader.empty() && | |
160 !this->PPOpts->ImplicitPCHInclude.empty()) | |
161 SkippingUntilPCHThroughHeader = true; | |
162 | |
152 | 163 #ifndef noCbC |
164 SavedDepth = 0; | |
165 SavedTokenFlag = false; | |
166 #endif | |
167 | |
150 | 168 if (this->PPOpts->GeneratePreamble) |
169 PreambleConditionalStack.startRecording(); | |
170 | |
171 ExcludedConditionalDirectiveSkipMappings = | |
172 this->PPOpts->ExcludedConditionalDirectiveSkipMappings; | |
173 if (ExcludedConditionalDirectiveSkipMappings) | |
174 ExcludedConditionalDirectiveSkipMappings->clear(); | |
175 | |
176 MaxTokens = LangOpts.MaxTokens; | |
177 } | |
178 | |
179 Preprocessor::~Preprocessor() { | |
180 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); | |
181 | |
182 IncludeMacroStack.clear(); | |
183 | |
184 // Destroy any macro definitions. | |
185 while (MacroInfoChain *I = MIChainHead) { | |
186 MIChainHead = I->Next; | |
187 I->~MacroInfoChain(); | |
188 } | |
189 | |
190 // Free any cached macro expanders. | |
191 // This populates MacroArgCache, so all TokenLexers need to be destroyed | |
192 // before the code below that frees up the MacroArgCache list. | |
193 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); | |
194 CurTokenLexer.reset(); | |
195 | |
196 // Free any cached MacroArgs. | |
197 for (MacroArgs *ArgList = MacroArgCache; ArgList;) | |
198 ArgList = ArgList->deallocate(); | |
199 | |
200 // Delete the header search info, if we own it. | |
201 if (OwnsHeaderSearch) | |
202 delete &HeaderInfo; | |
203 } | |
204 | |
205 void Preprocessor::Initialize(const TargetInfo &Target, | |
206 const TargetInfo *AuxTarget) { | |
207 assert((!this->Target || this->Target == &Target) && | |
208 "Invalid override of target information"); | |
209 this->Target = &Target; | |
210 | |
211 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && | |
212 "Invalid override of aux target information."); | |
213 this->AuxTarget = AuxTarget; | |
214 | |
215 // Initialize information about built-ins. | |
216 BuiltinInfo->InitializeTarget(Target, AuxTarget); | |
217 HeaderInfo.setTarget(Target); | |
218 | |
219 // Populate the identifier table with info about keywords for the current language. | |
220 Identifiers.AddKeywords(LangOpts); | |
221 } | |
222 | |
223 void Preprocessor::InitializeForModelFile() { | |
224 NumEnteredSourceFiles = 0; | |
225 | |
226 // Reset pragmas | |
227 PragmaHandlersBackup = std::move(PragmaHandlers); | |
228 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef()); | |
229 RegisterBuiltinPragmas(); | |
230 | |
231 // Reset PredefinesFileID | |
232 PredefinesFileID = FileID(); | |
233 } | |
234 | |
235 void Preprocessor::FinalizeForModelFile() { | |
236 NumEnteredSourceFiles = 1; | |
237 | |
238 PragmaHandlers = std::move(PragmaHandlersBackup); | |
239 } | |
240 | |
241 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { | |
242 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" | |
243 << getSpelling(Tok) << "'"; | |
244 | |
245 if (!DumpFlags) return; | |
246 | |
247 llvm::errs() << "\t"; | |
248 if (Tok.isAtStartOfLine()) | |
249 llvm::errs() << " [StartOfLine]"; | |
250 if (Tok.hasLeadingSpace()) | |
251 llvm::errs() << " [LeadingSpace]"; | |
252 if (Tok.isExpandDisabled()) | |
253 llvm::errs() << " [ExpandDisabled]"; | |
254 if (Tok.needsCleaning()) { | |
255 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); | |
256 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) | |
257 << "']"; | |
258 } | |
259 | |
260 llvm::errs() << "\tLoc=<"; | |
261 DumpLocation(Tok.getLocation()); | |
262 llvm::errs() << ">"; | |
263 } | |
264 | |
265 void Preprocessor::DumpLocation(SourceLocation Loc) const { | |
266 Loc.print(llvm::errs(), SourceMgr); | |
267 } | |
268 | |
269 void Preprocessor::DumpMacro(const MacroInfo &MI) const { | |
270 llvm::errs() << "MACRO: "; | |
271 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { | |
272 DumpToken(MI.getReplacementToken(i)); | |
273 llvm::errs() << " "; | |
274 } | |
275 llvm::errs() << "\n"; | |
276 } | |
277 | |
278 void Preprocessor::PrintStats() { | |
279 llvm::errs() << "\n*** Preprocessor Stats:\n"; | |
280 llvm::errs() << NumDirectives << " directives found:\n"; | |
281 llvm::errs() << " " << NumDefined << " #define.\n"; | |
282 llvm::errs() << " " << NumUndefined << " #undef.\n"; | |
283 llvm::errs() << " #include/#include_next/#import:\n"; | |
284 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; | |
285 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; | |
286 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; | |
287 llvm::errs() << " " << NumElse << " #else/#elif.\n"; | |
288 llvm::errs() << " " << NumEndif << " #endif.\n"; | |
289 llvm::errs() << " " << NumPragma << " #pragma.\n"; | |
290 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; | |
291 | |
292 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" | |
293 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " | |
294 << NumFastMacroExpanded << " on the fast path.\n"; | |
295 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) | |
296 << " token paste (##) operations performed, " | |
297 << NumFastTokenPaste << " on the fast path.\n"; | |
298 | |
299 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; | |
300 | |
301 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); | |
302 llvm::errs() << "\n Macro Expanded Tokens: " | |
303 << llvm::capacity_in_bytes(MacroExpandedTokens); | |
304 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); | |
305 // FIXME: List information for all submodules. | |
306 llvm::errs() << "\n Macros: " | |
307 << llvm::capacity_in_bytes(CurSubmoduleState->Macros); | |
308 llvm::errs() << "\n #pragma push_macro Info: " | |
309 << llvm::capacity_in_bytes(PragmaPushMacroInfo); | |
310 llvm::errs() << "\n Poison Reasons: " | |
311 << llvm::capacity_in_bytes(PoisonReasons); | |
312 llvm::errs() << "\n Comment Handlers: " | |
313 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; | |
314 } | |
315 | |
316 Preprocessor::macro_iterator | |
317 Preprocessor::macro_begin(bool IncludeExternalMacros) const { | |
318 if (IncludeExternalMacros && ExternalSource && | |
319 !ReadMacrosFromExternalSource) { | |
320 ReadMacrosFromExternalSource = true; | |
321 ExternalSource->ReadDefinedMacros(); | |
322 } | |
323 | |
324 // Make sure we cover all macros in visible modules. | |
325 for (const ModuleMacro &Macro : ModuleMacros) | |
326 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState())); | |
327 | |
328 return CurSubmoduleState->Macros.begin(); | |
329 } | |
330 | |
331 size_t Preprocessor::getTotalMemory() const { | |
332 return BP.getTotalMemory() | |
333 + llvm::capacity_in_bytes(MacroExpandedTokens) | |
334 + Predefines.capacity() /* Predefines buffer. */ | |
335 // FIXME: Include sizes from all submodules, and include MacroInfo sizes, | |
336 // and ModuleMacros. | |
337 + llvm::capacity_in_bytes(CurSubmoduleState->Macros) | |
338 + llvm::capacity_in_bytes(PragmaPushMacroInfo) | |
339 + llvm::capacity_in_bytes(PoisonReasons) | |
340 + llvm::capacity_in_bytes(CommentHandlers); | |
341 } | |
342 | |
343 Preprocessor::macro_iterator | |
344 Preprocessor::macro_end(bool IncludeExternalMacros) const { | |
345 if (IncludeExternalMacros && ExternalSource && | |
346 !ReadMacrosFromExternalSource) { | |
347 ReadMacrosFromExternalSource = true; | |
348 ExternalSource->ReadDefinedMacros(); | |
349 } | |
350 | |
351 return CurSubmoduleState->Macros.end(); | |
352 } | |
353 | |
354 /// Compares macro tokens with a specified token value sequence. | |
355 static bool MacroDefinitionEquals(const MacroInfo *MI, | |
356 ArrayRef<TokenValue> Tokens) { | |
357 return Tokens.size() == MI->getNumTokens() && | |
358 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); | |
359 } | |
360 | |
361 StringRef Preprocessor::getLastMacroWithSpelling( | |
362 SourceLocation Loc, | |
363 ArrayRef<TokenValue> Tokens) const { | |
364 SourceLocation BestLocation; | |
365 StringRef BestSpelling; | |
366 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); | |
367 I != E; ++I) { | |
368 const MacroDirective::DefInfo | |
369 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr); | |
370 if (!Def || !Def.getMacroInfo()) | |
371 continue; | |
372 if (!Def.getMacroInfo()->isObjectLike()) | |
373 continue; | |
374 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) | |
375 continue; | |
376 SourceLocation Location = Def.getLocation(); | |
377 // Choose the macro defined latest. | |
378 if (BestLocation.isInvalid() || | |
379 (Location.isValid() && | |
380 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { | |
381 BestLocation = Location; | |
382 BestSpelling = I->first->getName(); | |
383 } | |
384 } | |
385 return BestSpelling; | |
386 } | |
387 | |
388 void Preprocessor::recomputeCurLexerKind() { | |
389 if (CurLexer) | |
390 CurLexerKind = CLK_Lexer; | |
391 else if (CurTokenLexer) | |
392 CurLexerKind = CLK_TokenLexer; | |
393 else | |
394 CurLexerKind = CLK_CachingLexer; | |
395 } | |
396 | |
397 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, | |
398 unsigned CompleteLine, | |
399 unsigned CompleteColumn) { | |
400 assert(File); | |
401 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); | |
402 assert(!CodeCompletionFile && "Already set"); | |
403 | |
404 using llvm::MemoryBuffer; | |
405 | |
406 // Load the actual file's contents. | |
407 bool Invalid = false; | |
408 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); | |
409 if (Invalid) | |
410 return true; | |
411 | |
412 // Find the byte position of the truncation point. | |
413 const char *Position = Buffer->getBufferStart(); | |
414 for (unsigned Line = 1; Line < CompleteLine; ++Line) { | |
415 for (; *Position; ++Position) { | |
416 if (*Position != '\r' && *Position != '\n') | |
417 continue; | |
418 | |
419 // Eat \r\n or \n\r as a single line. | |
420 if ((Position[1] == '\r' || Position[1] == '\n') && | |
421 Position[0] != Position[1]) | |
422 ++Position; | |
423 ++Position; | |
424 break; | |
425 } | |
426 } | |
427 | |
428 Position += CompleteColumn - 1; | |
429 | |
430 // If pointing inside the preamble, adjust the position at the beginning of | |
431 // the file after the preamble. | |
432 if (SkipMainFilePreamble.first && | |
433 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { | |
434 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) | |
435 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; | |
436 } | |
437 | |
438 if (Position > Buffer->getBufferEnd()) | |
439 Position = Buffer->getBufferEnd(); | |
440 | |
441 CodeCompletionFile = File; | |
442 CodeCompletionOffset = Position - Buffer->getBufferStart(); | |
443 | |
444 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer( | |
445 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier()); | |
446 char *NewBuf = NewBuffer->getBufferStart(); | |
447 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); | |
448 *NewPos = '\0'; | |
449 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); | |
450 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); | |
451 | |
452 return false; | |
453 } | |
454 | |
455 void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir, | |
456 bool IsAngled) { | |
457 if (CodeComplete) | |
458 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled); | |
459 setCodeCompletionReached(); | |
460 } | |
461 | |
462 void Preprocessor::CodeCompleteNaturalLanguage() { | |
463 if (CodeComplete) | |
464 CodeComplete->CodeCompleteNaturalLanguage(); | |
465 setCodeCompletionReached(); | |
466 } | |
467 | |
468 /// getSpelling - This method is used to get the spelling of a token into a | |
469 /// SmallVector. Note that the returned StringRef may not point to the | |
470 /// supplied buffer if a copy can be avoided. | |
471 StringRef Preprocessor::getSpelling(const Token &Tok, | |
472 SmallVectorImpl<char> &Buffer, | |
473 bool *Invalid) const { | |
474 // NOTE: this has to be checked *before* testing for an IdentifierInfo. | |
475 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { | |
476 // Try the fast path. | |
477 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) | |
478 return II->getName(); | |
479 } | |
480 | |
481 // Resize the buffer if we need to copy into it. | |
482 if (Tok.needsCleaning()) | |
483 Buffer.resize(Tok.getLength()); | |
484 | |
485 const char *Ptr = Buffer.data(); | |
486 unsigned Len = getSpelling(Tok, Ptr, Invalid); | |
487 return StringRef(Ptr, Len); | |
488 } | |
489 | |
490 /// CreateString - Plop the specified string into a scratch buffer and return a | |
491 /// location for it. If specified, the source location provides a source | |
492 /// location for the token. | |
493 void Preprocessor::CreateString(StringRef Str, Token &Tok, | |
494 SourceLocation ExpansionLocStart, | |
495 SourceLocation ExpansionLocEnd) { | |
496 Tok.setLength(Str.size()); | |
497 | |
498 const char *DestPtr; | |
499 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); | |
500 | |
501 if (ExpansionLocStart.isValid()) | |
502 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, | |
503 ExpansionLocEnd, Str.size()); | |
504 Tok.setLocation(Loc); | |
505 | |
506 // If this is a raw identifier or a literal token, set the pointer data. | |
507 if (Tok.is(tok::raw_identifier)) | |
508 Tok.setRawIdentifierData(DestPtr); | |
509 else if (Tok.isLiteral()) | |
510 Tok.setLiteralData(DestPtr); | |
511 } | |
512 | |
513 SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) { | |
514 auto &SM = getSourceManager(); | |
515 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); | |
516 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc); | |
517 bool Invalid = false; | |
518 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); | |
519 if (Invalid) | |
520 return SourceLocation(); | |
521 | |
522 // FIXME: We could consider re-using spelling for tokens we see repeatedly. | |
523 const char *DestPtr; | |
524 SourceLocation Spelling = | |
525 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr); | |
526 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length)); | |
527 } | |
528 | |
529 Module *Preprocessor::getCurrentModule() { | |
530 if (!getLangOpts().isCompilingModule()) | |
531 return nullptr; | |
532 | |
533 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); | |
534 } | |
535 | |
536 //===----------------------------------------------------------------------===// | |
537 // Preprocessor Initialization Methods | |
538 //===----------------------------------------------------------------------===// | |
539 | |
540 /// EnterMainSourceFile - Enter the specified FileID as the main source file, | |
541 /// which implicitly adds the builtin defines etc. | |
542 void Preprocessor::EnterMainSourceFile() { | |
543 // We do not allow the preprocessor to reenter the main file. Doing so will | |
544 // cause FileID's to accumulate information from both runs (e.g. #line | |
545 // information) and predefined macros aren't guaranteed to be set properly. | |
546 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); | |
547 FileID MainFileID = SourceMgr.getMainFileID(); | |
548 | |
549 // If MainFileID is loaded it means we loaded an AST file, no need to enter | |
550 // a main file. | |
551 if (!SourceMgr.isLoadedFileID(MainFileID)) { | |
552 // Enter the main file source buffer. | |
553 EnterSourceFile(MainFileID, nullptr, SourceLocation()); | |
554 | |
555 // If we've been asked to skip bytes in the main file (e.g., as part of a | |
556 // precompiled preamble), do so now. | |
557 if (SkipMainFilePreamble.first > 0) | |
558 CurLexer->SetByteOffset(SkipMainFilePreamble.first, | |
559 SkipMainFilePreamble.second); | |
560 | |
561 // Tell the header info that the main file was entered. If the file is later | |
562 // #imported, it won't be re-entered. | |
563 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) | |
564 HeaderInfo.IncrementIncludeCount(FE); | |
565 } | |
566 | |
567 // Preprocess Predefines to populate the initial preprocessor state. | |
568 std::unique_ptr<llvm::MemoryBuffer> SB = | |
569 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); | |
570 assert(SB && "Cannot create predefined source buffer"); | |
571 FileID FID = SourceMgr.createFileID(std::move(SB)); | |
572 assert(FID.isValid() && "Could not create FileID for predefines?"); | |
573 setPredefinesFileID(FID); | |
574 | |
575 // Start parsing the predefines. | |
576 EnterSourceFile(FID, nullptr, SourceLocation()); | |
577 | |
578 if (!PPOpts->PCHThroughHeader.empty()) { | |
579 // Lookup and save the FileID for the through header. If it isn't found | |
580 // in the search path, it's a fatal error. | |
581 const DirectoryLookup *CurDir; | |
582 Optional<FileEntryRef> File = LookupFile( | |
583 SourceLocation(), PPOpts->PCHThroughHeader, | |
584 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir, | |
585 /*SearchPath=*/nullptr, /*RelativePath=*/nullptr, | |
586 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr, | |
587 /*IsFrameworkFound=*/nullptr); | |
588 if (!File) { | |
589 Diag(SourceLocation(), diag::err_pp_through_header_not_found) | |
590 << PPOpts->PCHThroughHeader; | |
591 return; | |
592 } | |
593 setPCHThroughHeaderFileID( | |
594 SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User)); | |
595 } | |
596 | |
597 // Skip tokens from the Predefines and if needed the main file. | |
598 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) || | |
599 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop)) | |
600 SkipTokensWhileUsingPCH(); | |
601 } | |
602 | |
603 void Preprocessor::setPCHThroughHeaderFileID(FileID FID) { | |
604 assert(PCHThroughHeaderFileID.isInvalid() && | |
605 "PCHThroughHeaderFileID already set!"); | |
606 PCHThroughHeaderFileID = FID; | |
607 } | |
608 | |
609 bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) { | |
610 assert(PCHThroughHeaderFileID.isValid() && | |
611 "Invalid PCH through header FileID"); | |
612 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID); | |
613 } | |
614 | |
615 bool Preprocessor::creatingPCHWithThroughHeader() { | |
616 return TUKind == TU_Prefix && !PPOpts->PCHThroughHeader.empty() && | |
617 PCHThroughHeaderFileID.isValid(); | |
618 } | |
619 | |
620 bool Preprocessor::usingPCHWithThroughHeader() { | |
621 return TUKind != TU_Prefix && !PPOpts->PCHThroughHeader.empty() && | |
622 PCHThroughHeaderFileID.isValid(); | |
623 } | |
624 | |
625 bool Preprocessor::creatingPCHWithPragmaHdrStop() { | |
626 return TUKind == TU_Prefix && PPOpts->PCHWithHdrStop; | |
627 } | |
628 | |
629 bool Preprocessor::usingPCHWithPragmaHdrStop() { | |
630 return TUKind != TU_Prefix && PPOpts->PCHWithHdrStop; | |
631 } | |
632 | |
633 /// Skip tokens until after the #include of the through header or | |
634 /// until after a #pragma hdrstop is seen. Tokens in the predefines file | |
635 /// and the main file may be skipped. If the end of the predefines file | |
636 /// is reached, skipping continues into the main file. If the end of the | |
637 /// main file is reached, it's a fatal error. | |
638 void Preprocessor::SkipTokensWhileUsingPCH() { | |
639 bool ReachedMainFileEOF = false; | |
640 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader; | |
641 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop; | |
642 Token Tok; | |
643 while (true) { | |
644 bool InPredefines = | |
645 (CurLexer && CurLexer->getFileID() == getPredefinesFileID()); | |
646 switch (CurLexerKind) { | |
647 case CLK_Lexer: | |
648 CurLexer->Lex(Tok); | |
649 break; | |
650 case CLK_TokenLexer: | |
651 CurTokenLexer->Lex(Tok); | |
652 break; | |
653 case CLK_CachingLexer: | |
654 CachingLex(Tok); | |
655 break; | |
656 case CLK_LexAfterModuleImport: | |
657 LexAfterModuleImport(Tok); | |
658 break; | |
659 } | |
660 if (Tok.is(tok::eof) && !InPredefines) { | |
661 ReachedMainFileEOF = true; | |
662 break; | |
663 } | |
664 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader) | |
665 break; | |
666 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop) | |
667 break; | |
668 } | |
669 if (ReachedMainFileEOF) { | |
670 if (UsingPCHThroughHeader) | |
671 Diag(SourceLocation(), diag::err_pp_through_header_not_seen) | |
672 << PPOpts->PCHThroughHeader << 1; | |
673 else if (!PPOpts->PCHWithHdrStopCreate) | |
674 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen); | |
675 } | |
676 } | |
677 | |
678 void Preprocessor::replayPreambleConditionalStack() { | |
679 // Restore the conditional stack from the preamble, if there is one. | |
680 if (PreambleConditionalStack.isReplaying()) { | |
681 assert(CurPPLexer && | |
682 "CurPPLexer is null when calling replayPreambleConditionalStack."); | |
683 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); | |
684 PreambleConditionalStack.doneReplaying(); | |
685 if (PreambleConditionalStack.reachedEOFWhileSkipping()) | |
686 SkipExcludedConditionalBlock( | |
687 PreambleConditionalStack.SkipInfo->HashTokenLoc, | |
688 PreambleConditionalStack.SkipInfo->IfTokenLoc, | |
689 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion, | |
690 PreambleConditionalStack.SkipInfo->FoundElse, | |
691 PreambleConditionalStack.SkipInfo->ElseLoc); | |
692 } | |
693 } | |
694 | |
695 void Preprocessor::EndSourceFile() { | |
696 // Notify the client that we reached the end of the source file. | |
697 if (Callbacks) | |
698 Callbacks->EndOfMainFile(); | |
699 } | |
700 | |
701 //===----------------------------------------------------------------------===// | |
702 // Lexer Event Handling. | |
703 //===----------------------------------------------------------------------===// | |
704 | |
705 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the | |
706 /// identifier information for the token and install it into the token, | |
707 /// updating the token kind accordingly. | |
708 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { | |
709 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); | |
710 | |
711 // Look up this token, see if it is a macro, or if it is a language keyword. | |
712 IdentifierInfo *II; | |
713 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { | |
714 // No cleaning needed, just use the characters from the lexed buffer. | |
715 II = getIdentifierInfo(Identifier.getRawIdentifier()); | |
716 } else { | |
717 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. | |
718 SmallString<64> IdentifierBuffer; | |
719 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); | |
720 | |
721 if (Identifier.hasUCN()) { | |
722 SmallString<64> UCNIdentifierBuffer; | |
723 expandUCNs(UCNIdentifierBuffer, CleanedStr); | |
724 II = getIdentifierInfo(UCNIdentifierBuffer); | |
725 } else { | |
726 II = getIdentifierInfo(CleanedStr); | |
727 } | |
728 } | |
729 | |
730 // Update the token info (identifier info and appropriate token kind). | |
731 Identifier.setIdentifierInfo(II); | |
732 if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && | |
733 getSourceManager().isInSystemHeader(Identifier.getLocation())) | |
734 Identifier.setKind(tok::identifier); | |
735 else | |
736 Identifier.setKind(II->getTokenID()); | |
737 | |
738 return II; | |
739 } | |
740 | |
741 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { | |
742 PoisonReasons[II] = DiagID; | |
743 } | |
744 | |
745 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { | |
746 assert(Ident__exception_code && Ident__exception_info); | |
747 assert(Ident___exception_code && Ident___exception_info); | |
748 Ident__exception_code->setIsPoisoned(Poison); | |
749 Ident___exception_code->setIsPoisoned(Poison); | |
750 Ident_GetExceptionCode->setIsPoisoned(Poison); | |
751 Ident__exception_info->setIsPoisoned(Poison); | |
752 Ident___exception_info->setIsPoisoned(Poison); | |
753 Ident_GetExceptionInfo->setIsPoisoned(Poison); | |
754 Ident__abnormal_termination->setIsPoisoned(Poison); | |
755 Ident___abnormal_termination->setIsPoisoned(Poison); | |
756 Ident_AbnormalTermination->setIsPoisoned(Poison); | |
757 } | |
758 | |
759 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { | |
760 assert(Identifier.getIdentifierInfo() && | |
761 "Can't handle identifiers without identifier info!"); | |
762 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = | |
763 PoisonReasons.find(Identifier.getIdentifierInfo()); | |
764 if(it == PoisonReasons.end()) | |
765 Diag(Identifier, diag::err_pp_used_poisoned_id); | |
766 else | |
767 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); | |
768 } | |
769 | |
770 /// Returns a diagnostic message kind for reporting a future keyword as | |
771 /// appropriate for the identifier and specified language. | |
772 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, | |
773 const LangOptions &LangOpts) { | |
774 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); | |
775 | |
776 if (LangOpts.CPlusPlus) | |
777 return llvm::StringSwitch<diag::kind>(II.getName()) | |
778 #define CXX11_KEYWORD(NAME, FLAGS) \ | |
779 .Case(#NAME, diag::warn_cxx11_keyword) | |
173 | 780 #define CXX20_KEYWORD(NAME, FLAGS) \ |
781 .Case(#NAME, diag::warn_cxx20_keyword) | |
150 | 782 #include "clang/Basic/TokenKinds.def" |
173 | 783 // char8_t is not modeled as a CXX20_KEYWORD because it's not |
784 // unconditionally enabled in C++20 mode. (It can be disabled | |
785 // by -fno-char8_t.) | |
786 .Case("char8_t", diag::warn_cxx20_keyword) | |
150 | 787 ; |
788 | |
789 llvm_unreachable( | |
790 "Keyword not known to come from a newer Standard or proposed Standard"); | |
791 } | |
792 | |
793 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const { | |
794 assert(II.isOutOfDate() && "not out of date"); | |
795 getExternalSource()->updateOutOfDateIdentifier(II); | |
796 } | |
797 | |
798 /// HandleIdentifier - This callback is invoked when the lexer reads an | |
799 /// identifier. This callback looks up the identifier in the map and/or | |
800 /// potentially macro expands it or turns it into a named token (like 'for'). | |
801 /// | |
802 /// Note that callers of this method are guarded by checking the | |
803 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the | |
804 /// IdentifierInfo methods that compute these properties will need to change to | |
805 /// match. | |
806 bool Preprocessor::HandleIdentifier(Token &Identifier) { | |
807 assert(Identifier.getIdentifierInfo() && | |
808 "Can't handle identifiers without identifier info!"); | |
809 | |
810 IdentifierInfo &II = *Identifier.getIdentifierInfo(); | |
811 | |
812 // If the information about this identifier is out of date, update it from | |
813 // the external source. | |
814 // We have to treat __VA_ARGS__ in a special way, since it gets | |
815 // serialized with isPoisoned = true, but our preprocessor may have | |
816 // unpoisoned it if we're defining a C99 macro. | |
817 if (II.isOutOfDate()) { | |
818 bool CurrentIsPoisoned = false; | |
819 const bool IsSpecialVariadicMacro = | |
820 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__; | |
821 if (IsSpecialVariadicMacro) | |
822 CurrentIsPoisoned = II.isPoisoned(); | |
823 | |
824 updateOutOfDateIdentifier(II); | |
825 Identifier.setKind(II.getTokenID()); | |
826 | |
827 if (IsSpecialVariadicMacro) | |
828 II.setIsPoisoned(CurrentIsPoisoned); | |
829 } | |
830 | |
831 // If this identifier was poisoned, and if it was not produced from a macro | |
832 // expansion, emit an error. | |
833 if (II.isPoisoned() && CurPPLexer) { | |
834 HandlePoisonedIdentifier(Identifier); | |
835 } | |
836 | |
837 // If this is a macro to be expanded, do it. | |
838 if (MacroDefinition MD = getMacroDefinition(&II)) { | |
839 auto *MI = MD.getMacroInfo(); | |
840 assert(MI && "macro definition with no macro info?"); | |
841 if (!DisableMacroExpansion) { | |
842 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { | |
843 // C99 6.10.3p10: If the preprocessing token immediately after the | |
844 // macro name isn't a '(', this macro should not be expanded. | |
845 if (!MI->isFunctionLike() || isNextPPTokenLParen()) | |
846 return HandleMacroExpandedIdentifier(Identifier, MD); | |
847 } else { | |
848 // C99 6.10.3.4p2 says that a disabled macro may never again be | |
849 // expanded, even if it's in a context where it could be expanded in the | |
850 // future. | |
851 Identifier.setFlag(Token::DisableExpand); | |
852 if (MI->isObjectLike() || isNextPPTokenLParen()) | |
853 Diag(Identifier, diag::pp_disabled_macro_expansion); | |
854 } | |
855 } | |
856 } | |
857 | |
858 // If this identifier is a keyword in a newer Standard or proposed Standard, | |
859 // produce a warning. Don't warn if we're not considering macro expansion, | |
860 // since this identifier might be the name of a macro. | |
861 // FIXME: This warning is disabled in cases where it shouldn't be, like | |
862 // "#define constexpr constexpr", "int constexpr;" | |
863 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) { | |
864 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts())) | |
865 << II.getName(); | |
866 // Don't diagnose this keyword again in this translation unit. | |
867 II.setIsFutureCompatKeyword(false); | |
868 } | |
869 | |
870 // If this is an extension token, diagnose its use. | |
871 // We avoid diagnosing tokens that originate from macro definitions. | |
872 // FIXME: This warning is disabled in cases where it shouldn't be, | |
873 // like "#define TY typeof", "TY(1) x". | |
874 if (II.isExtensionToken() && !DisableMacroExpansion) | |
875 Diag(Identifier, diag::ext_token_used); | |
876 | |
877 // If this is the 'import' contextual keyword following an '@', note | |
878 // that the next token indicates a module name. | |
879 // | |
880 // Note that we do not treat 'import' as a contextual | |
881 // keyword when we're in a caching lexer, because caching lexers only get | |
882 // used in contexts where import declarations are disallowed. | |
883 // | |
884 // Likewise if this is the C++ Modules TS import keyword. | |
885 if (((LastTokenWasAt && II.isModulesImport()) || | |
886 Identifier.is(tok::kw_import)) && | |
887 !InMacroArgs && !DisableMacroExpansion && | |
888 (getLangOpts().Modules || getLangOpts().DebuggerSupport) && | |
889 CurLexerKind != CLK_CachingLexer) { | |
890 ModuleImportLoc = Identifier.getLocation(); | |
891 ModuleImportPath.clear(); | |
892 ModuleImportExpectsIdentifier = true; | |
893 CurLexerKind = CLK_LexAfterModuleImport; | |
894 } | |
895 return true; | |
896 } | |
897 | |
898 void Preprocessor::Lex(Token &Result) { | |
899 ++LexLevel; | |
900 | |
901 // We loop here until a lex function returns a token; this avoids recursion. | |
902 bool ReturnedToken; | |
903 do { | |
904 switch (CurLexerKind) { | |
905 case CLK_Lexer: | |
152 | 906 #ifndef noCbC |
907 ReturnedToken = CurLexer->Lex(Result, ProtoParsing); | |
908 #else | |
150 | 909 ReturnedToken = CurLexer->Lex(Result); |
152 | 910 #endif |
150 | 911 break; |
912 case CLK_TokenLexer: | |
913 ReturnedToken = CurTokenLexer->Lex(Result); | |
914 break; | |
915 case CLK_CachingLexer: | |
916 CachingLex(Result); | |
917 ReturnedToken = true; | |
918 break; | |
919 case CLK_LexAfterModuleImport: | |
920 ReturnedToken = LexAfterModuleImport(Result); | |
921 break; | |
922 } | |
923 } while (!ReturnedToken); | |
924 | |
925 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure) | |
926 return; | |
927 | |
928 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) { | |
929 // Remember the identifier before code completion token. | |
930 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo()); | |
931 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc()); | |
932 // Set IdenfitierInfo to null to avoid confusing code that handles both | |
933 // identifiers and completion tokens. | |
934 Result.setIdentifierInfo(nullptr); | |
935 } | |
936 | |
937 // Update ImportSeqState to track our position within a C++20 import-seq | |
938 // if this token is being produced as a result of phase 4 of translation. | |
939 if (getLangOpts().CPlusPlusModules && LexLevel == 1 && | |
940 !Result.getFlag(Token::IsReinjected)) { | |
941 switch (Result.getKind()) { | |
942 case tok::l_paren: case tok::l_square: case tok::l_brace: | |
943 ImportSeqState.handleOpenBracket(); | |
944 break; | |
945 case tok::r_paren: case tok::r_square: | |
946 ImportSeqState.handleCloseBracket(); | |
947 break; | |
948 case tok::r_brace: | |
949 ImportSeqState.handleCloseBrace(); | |
950 break; | |
951 case tok::semi: | |
952 ImportSeqState.handleSemi(); | |
953 break; | |
954 case tok::header_name: | |
955 case tok::annot_header_unit: | |
956 ImportSeqState.handleHeaderName(); | |
957 break; | |
958 case tok::kw_export: | |
959 ImportSeqState.handleExport(); | |
960 break; | |
961 case tok::identifier: | |
962 if (Result.getIdentifierInfo()->isModulesImport()) { | |
963 ImportSeqState.handleImport(); | |
964 if (ImportSeqState.afterImportSeq()) { | |
965 ModuleImportLoc = Result.getLocation(); | |
966 ModuleImportPath.clear(); | |
967 ModuleImportExpectsIdentifier = true; | |
968 CurLexerKind = CLK_LexAfterModuleImport; | |
969 } | |
970 break; | |
971 } | |
972 LLVM_FALLTHROUGH; | |
973 default: | |
974 ImportSeqState.handleMisc(); | |
975 break; | |
976 } | |
977 } | |
978 | |
979 LastTokenWasAt = Result.is(tok::at); | |
980 --LexLevel; | |
981 | |
982 if (LexLevel == 0 && !Result.getFlag(Token::IsReinjected)) { | |
983 ++TokenCount; | |
984 if (OnToken) | |
985 OnToken(Result); | |
986 } | |
987 } | |
988 | |
989 /// Lex a header-name token (including one formed from header-name-tokens if | |
990 /// \p AllowConcatenation is \c true). | |
991 /// | |
992 /// \param FilenameTok Filled in with the next token. On success, this will | |
993 /// be either a header_name token. On failure, it will be whatever other | |
994 /// token was found instead. | |
995 /// \param AllowMacroExpansion If \c true, allow the header name to be formed | |
996 /// by macro expansion (concatenating tokens as necessary if the first | |
997 /// token is a '<'). | |
998 /// \return \c true if we reached EOD or EOF while looking for a > token in | |
999 /// a concatenated header name and diagnosed it. \c false otherwise. | |
1000 bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { | |
1001 // Lex using header-name tokenization rules if tokens are being lexed from | |
1002 // a file. Just grab a token normally if we're in a macro expansion. | |
1003 if (CurPPLexer) | |
1004 CurPPLexer->LexIncludeFilename(FilenameTok); | |
1005 else | |
1006 Lex(FilenameTok); | |
1007 | |
1008 // This could be a <foo/bar.h> file coming from a macro expansion. In this | |
1009 // case, glue the tokens together into an angle_string_literal token. | |
1010 SmallString<128> FilenameBuffer; | |
1011 if (FilenameTok.is(tok::less) && AllowMacroExpansion) { | |
1012 bool StartOfLine = FilenameTok.isAtStartOfLine(); | |
1013 bool LeadingSpace = FilenameTok.hasLeadingSpace(); | |
1014 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro(); | |
1015 | |
1016 SourceLocation Start = FilenameTok.getLocation(); | |
1017 SourceLocation End; | |
1018 FilenameBuffer.push_back('<'); | |
1019 | |
1020 // Consume tokens until we find a '>'. | |
1021 // FIXME: A header-name could be formed starting or ending with an | |
1022 // alternative token. It's not clear whether that's ill-formed in all | |
1023 // cases. | |
1024 while (FilenameTok.isNot(tok::greater)) { | |
1025 Lex(FilenameTok); | |
1026 if (FilenameTok.isOneOf(tok::eod, tok::eof)) { | |
1027 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater; | |
1028 Diag(Start, diag::note_matching) << tok::less; | |
1029 return true; | |
1030 } | |
1031 | |
1032 End = FilenameTok.getLocation(); | |
1033 | |
1034 // FIXME: Provide code completion for #includes. | |
1035 if (FilenameTok.is(tok::code_completion)) { | |
1036 setCodeCompletionReached(); | |
1037 Lex(FilenameTok); | |
1038 continue; | |
1039 } | |
1040 | |
1041 // Append the spelling of this token to the buffer. If there was a space | |
1042 // before it, add it now. | |
1043 if (FilenameTok.hasLeadingSpace()) | |
1044 FilenameBuffer.push_back(' '); | |
1045 | |
1046 // Get the spelling of the token, directly into FilenameBuffer if | |
1047 // possible. | |
1048 size_t PreAppendSize = FilenameBuffer.size(); | |
1049 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength()); | |
1050 | |
1051 const char *BufPtr = &FilenameBuffer[PreAppendSize]; | |
1052 unsigned ActualLen = getSpelling(FilenameTok, BufPtr); | |
1053 | |
1054 // If the token was spelled somewhere else, copy it into FilenameBuffer. | |
1055 if (BufPtr != &FilenameBuffer[PreAppendSize]) | |
1056 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen); | |
1057 | |
1058 // Resize FilenameBuffer to the correct size. | |
1059 if (FilenameTok.getLength() != ActualLen) | |
1060 FilenameBuffer.resize(PreAppendSize + ActualLen); | |
1061 } | |
1062 | |
1063 FilenameTok.startToken(); | |
1064 FilenameTok.setKind(tok::header_name); | |
1065 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine); | |
1066 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace); | |
1067 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro); | |
1068 CreateString(FilenameBuffer, FilenameTok, Start, End); | |
1069 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) { | |
1070 // Convert a string-literal token of the form " h-char-sequence " | |
1071 // (produced by macro expansion) into a header-name token. | |
1072 // | |
1073 // The rules for header-names don't quite match the rules for | |
1074 // string-literals, but all the places where they differ result in | |
1075 // undefined behavior, so we can and do treat them the same. | |
1076 // | |
1077 // A string-literal with a prefix or suffix is not translated into a | |
1078 // header-name. This could theoretically be observable via the C++20 | |
1079 // context-sensitive header-name formation rules. | |
1080 StringRef Str = getSpelling(FilenameTok, FilenameBuffer); | |
1081 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"') | |
1082 FilenameTok.setKind(tok::header_name); | |
1083 } | |
1084 | |
1085 return false; | |
1086 } | |
1087 | |
1088 /// Collect the tokens of a C++20 pp-import-suffix. | |
1089 void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { | |
1090 // FIXME: For error recovery, consider recognizing attribute syntax here | |
1091 // and terminating / diagnosing a missing semicolon if we find anything | |
1092 // else? (Can we leave that to the parser?) | |
1093 unsigned BracketDepth = 0; | |
1094 while (true) { | |
1095 Toks.emplace_back(); | |
1096 Lex(Toks.back()); | |
1097 | |
1098 switch (Toks.back().getKind()) { | |
1099 case tok::l_paren: case tok::l_square: case tok::l_brace: | |
1100 ++BracketDepth; | |
1101 break; | |
1102 | |
1103 case tok::r_paren: case tok::r_square: case tok::r_brace: | |
1104 if (BracketDepth == 0) | |
1105 return; | |
1106 --BracketDepth; | |
1107 break; | |
1108 | |
1109 case tok::semi: | |
1110 if (BracketDepth == 0) | |
1111 return; | |
1112 break; | |
1113 | |
1114 case tok::eof: | |
1115 return; | |
1116 | |
1117 default: | |
1118 break; | |
1119 } | |
1120 } | |
1121 } | |
1122 | |
1123 | |
1124 /// Lex a token following the 'import' contextual keyword. | |
1125 /// | |
1126 /// pp-import: [C++20] | |
1127 /// import header-name pp-import-suffix[opt] ; | |
1128 /// import header-name-tokens pp-import-suffix[opt] ; | |
1129 /// [ObjC] @ import module-name ; | |
1130 /// [Clang] import module-name ; | |
1131 /// | |
1132 /// header-name-tokens: | |
1133 /// string-literal | |
1134 /// < [any sequence of preprocessing-tokens other than >] > | |
1135 /// | |
1136 /// module-name: | |
1137 /// module-name-qualifier[opt] identifier | |
1138 /// | |
1139 /// module-name-qualifier | |
1140 /// module-name-qualifier[opt] identifier . | |
1141 /// | |
1142 /// We respond to a pp-import by importing macros from the named module. | |
1143 bool Preprocessor::LexAfterModuleImport(Token &Result) { | |
1144 // Figure out what kind of lexer we actually have. | |
1145 recomputeCurLexerKind(); | |
1146 | |
1147 // Lex the next token. The header-name lexing rules are used at the start of | |
1148 // a pp-import. | |
1149 // | |
1150 // For now, we only support header-name imports in C++20 mode. | |
1151 // FIXME: Should we allow this in all language modes that support an import | |
1152 // declaration as an extension? | |
1153 if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { | |
1154 if (LexHeaderName(Result)) | |
1155 return true; | |
1156 } else { | |
1157 Lex(Result); | |
1158 } | |
1159 | |
1160 // Allocate a holding buffer for a sequence of tokens and introduce it into | |
1161 // the token stream. | |
1162 auto EnterTokens = [this](ArrayRef<Token> Toks) { | |
1163 auto ToksCopy = std::make_unique<Token[]>(Toks.size()); | |
1164 std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); | |
1165 EnterTokenStream(std::move(ToksCopy), Toks.size(), | |
1166 /*DisableMacroExpansion*/ true, /*IsReinject*/ false); | |
1167 }; | |
1168 | |
1169 // Check for a header-name. | |
1170 SmallVector<Token, 32> Suffix; | |
1171 if (Result.is(tok::header_name)) { | |
1172 // Enter the header-name token into the token stream; a Lex action cannot | |
1173 // both return a token and cache tokens (doing so would corrupt the token | |
1174 // cache if the call to Lex comes from CachingLex / PeekAhead). | |
1175 Suffix.push_back(Result); | |
1176 | |
1177 // Consume the pp-import-suffix and expand any macros in it now. We'll add | |
1178 // it back into the token stream later. | |
1179 CollectPpImportSuffix(Suffix); | |
1180 if (Suffix.back().isNot(tok::semi)) { | |
1181 // This is not a pp-import after all. | |
1182 EnterTokens(Suffix); | |
1183 return false; | |
1184 } | |
1185 | |
1186 // C++2a [cpp.module]p1: | |
1187 // The ';' preprocessing-token terminating a pp-import shall not have | |
1188 // been produced by macro replacement. | |
1189 SourceLocation SemiLoc = Suffix.back().getLocation(); | |
1190 if (SemiLoc.isMacroID()) | |
1191 Diag(SemiLoc, diag::err_header_import_semi_in_macro); | |
1192 | |
1193 // Reconstitute the import token. | |
1194 Token ImportTok; | |
1195 ImportTok.startToken(); | |
1196 ImportTok.setKind(tok::kw_import); | |
1197 ImportTok.setLocation(ModuleImportLoc); | |
1198 ImportTok.setIdentifierInfo(getIdentifierInfo("import")); | |
1199 ImportTok.setLength(6); | |
1200 | |
1201 auto Action = HandleHeaderIncludeOrImport( | |
1202 /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); | |
1203 switch (Action.Kind) { | |
1204 case ImportAction::None: | |
1205 break; | |
1206 | |
1207 case ImportAction::ModuleBegin: | |
1208 // Let the parser know we're textually entering the module. | |
1209 Suffix.emplace_back(); | |
1210 Suffix.back().startToken(); | |
1211 Suffix.back().setKind(tok::annot_module_begin); | |
1212 Suffix.back().setLocation(SemiLoc); | |
1213 Suffix.back().setAnnotationEndLoc(SemiLoc); | |
1214 Suffix.back().setAnnotationValue(Action.ModuleForHeader); | |
1215 LLVM_FALLTHROUGH; | |
1216 | |
1217 case ImportAction::ModuleImport: | |
1218 case ImportAction::SkippedModuleImport: | |
1219 // We chose to import (or textually enter) the file. Convert the | |
1220 // header-name token into a header unit annotation token. | |
1221 Suffix[0].setKind(tok::annot_header_unit); | |
1222 Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); | |
1223 Suffix[0].setAnnotationValue(Action.ModuleForHeader); | |
1224 // FIXME: Call the moduleImport callback? | |
1225 break; | |
1226 case ImportAction::Failure: | |
1227 assert(TheModuleLoader.HadFatalFailure && | |
1228 "This should be an early exit only to a fatal error"); | |
1229 Result.setKind(tok::eof); | |
1230 CurLexer->cutOffLexing(); | |
1231 EnterTokens(Suffix); | |
1232 return true; | |
1233 } | |
1234 | |
1235 EnterTokens(Suffix); | |
1236 return false; | |
1237 } | |
1238 | |
1239 // The token sequence | |
1240 // | |
1241 // import identifier (. identifier)* | |
1242 // | |
1243 // indicates a module import directive. We already saw the 'import' | |
1244 // contextual keyword, so now we're looking for the identifiers. | |
1245 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { | |
1246 // We expected to see an identifier here, and we did; continue handling | |
1247 // identifiers. | |
1248 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), | |
1249 Result.getLocation())); | |
1250 ModuleImportExpectsIdentifier = false; | |
1251 CurLexerKind = CLK_LexAfterModuleImport; | |
1252 return true; | |
1253 } | |
1254 | |
1255 // If we're expecting a '.' or a ';', and we got a '.', then wait until we | |
1256 // see the next identifier. (We can also see a '[[' that begins an | |
1257 // attribute-specifier-seq here under the C++ Modules TS.) | |
1258 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { | |
1259 ModuleImportExpectsIdentifier = true; | |
1260 CurLexerKind = CLK_LexAfterModuleImport; | |
1261 return true; | |
1262 } | |
1263 | |
1264 // If we didn't recognize a module name at all, this is not a (valid) import. | |
1265 if (ModuleImportPath.empty() || Result.is(tok::eof)) | |
1266 return true; | |
1267 | |
1268 // Consume the pp-import-suffix and expand any macros in it now, if we're not | |
1269 // at the semicolon already. | |
1270 SourceLocation SemiLoc = Result.getLocation(); | |
1271 if (Result.isNot(tok::semi)) { | |
1272 Suffix.push_back(Result); | |
1273 CollectPpImportSuffix(Suffix); | |
1274 if (Suffix.back().isNot(tok::semi)) { | |
1275 // This is not an import after all. | |
1276 EnterTokens(Suffix); | |
1277 return false; | |
1278 } | |
1279 SemiLoc = Suffix.back().getLocation(); | |
1280 } | |
1281 | |
1282 // Under the Modules TS, the dot is just part of the module name, and not | |
1283 // a real hierarchy separator. Flatten such module names now. | |
1284 // | |
1285 // FIXME: Is this the right level to be performing this transformation? | |
1286 std::string FlatModuleName; | |
1287 if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) { | |
1288 for (auto &Piece : ModuleImportPath) { | |
1289 if (!FlatModuleName.empty()) | |
1290 FlatModuleName += "."; | |
1291 FlatModuleName += Piece.first->getName(); | |
1292 } | |
1293 SourceLocation FirstPathLoc = ModuleImportPath[0].second; | |
1294 ModuleImportPath.clear(); | |
1295 ModuleImportPath.push_back( | |
1296 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc)); | |
1297 } | |
1298 | |
1299 Module *Imported = nullptr; | |
1300 if (getLangOpts().Modules) { | |
1301 Imported = TheModuleLoader.loadModule(ModuleImportLoc, | |
1302 ModuleImportPath, | |
1303 Module::Hidden, | |
1304 /*IsInclusionDirective=*/false); | |
1305 if (Imported) | |
1306 makeModuleVisible(Imported, SemiLoc); | |
1307 } | |
1308 if (Callbacks) | |
1309 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); | |
1310 | |
1311 if (!Suffix.empty()) { | |
1312 EnterTokens(Suffix); | |
1313 return false; | |
1314 } | |
1315 return true; | |
1316 } | |
1317 | |
1318 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) { | |
1319 CurSubmoduleState->VisibleModules.setVisible( | |
1320 M, Loc, [](Module *) {}, | |
1321 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) { | |
1322 // FIXME: Include the path in the diagnostic. | |
1323 // FIXME: Include the import location for the conflicting module. | |
1324 Diag(ModuleImportLoc, diag::warn_module_conflict) | |
1325 << Path[0]->getFullModuleName() | |
1326 << Conflict->getFullModuleName() | |
1327 << Message; | |
1328 }); | |
1329 | |
1330 // Add this module to the imports list of the currently-built submodule. | |
1331 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) | |
1332 BuildingSubmoduleStack.back().M->Imports.insert(M); | |
1333 } | |
1334 | |
1335 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, | |
1336 const char *DiagnosticTag, | |
1337 bool AllowMacroExpansion) { | |
1338 // We need at least one string literal. | |
1339 if (Result.isNot(tok::string_literal)) { | |
1340 Diag(Result, diag::err_expected_string_literal) | |
1341 << /*Source='in...'*/0 << DiagnosticTag; | |
1342 return false; | |
1343 } | |
1344 | |
1345 // Lex string literal tokens, optionally with macro expansion. | |
1346 SmallVector<Token, 4> StrToks; | |
1347 do { | |
1348 StrToks.push_back(Result); | |
1349 | |
1350 if (Result.hasUDSuffix()) | |
1351 Diag(Result, diag::err_invalid_string_udl); | |
1352 | |
1353 if (AllowMacroExpansion) | |
1354 Lex(Result); | |
1355 else | |
1356 LexUnexpandedToken(Result); | |
1357 } while (Result.is(tok::string_literal)); | |
1358 | |
1359 // Concatenate and parse the strings. | |
1360 StringLiteralParser Literal(StrToks, *this); | |
1361 assert(Literal.isAscii() && "Didn't allow wide strings in"); | |
1362 | |
1363 if (Literal.hadError) | |
1364 return false; | |
1365 | |
1366 if (Literal.Pascal) { | |
1367 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) | |
1368 << /*Source='in...'*/0 << DiagnosticTag; | |
1369 return false; | |
1370 } | |
1371 | |
1372 String = std::string(Literal.GetString()); | |
1373 return true; | |
1374 } | |
1375 | |
1376 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { | |
1377 assert(Tok.is(tok::numeric_constant)); | |
1378 SmallString<8> IntegerBuffer; | |
1379 bool NumberInvalid = false; | |
1380 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); | |
1381 if (NumberInvalid) | |
1382 return false; | |
1383 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); | |
1384 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) | |
1385 return false; | |
1386 llvm::APInt APVal(64, 0); | |
1387 if (Literal.GetIntegerValue(APVal)) | |
1388 return false; | |
1389 Lex(Tok); | |
1390 Value = APVal.getLimitedValue(); | |
1391 return true; | |
1392 } | |
1393 | |
1394 void Preprocessor::addCommentHandler(CommentHandler *Handler) { | |
1395 assert(Handler && "NULL comment handler"); | |
1396 assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() && | |
1397 "Comment handler already registered"); | |
1398 CommentHandlers.push_back(Handler); | |
1399 } | |
1400 | |
1401 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { | |
1402 std::vector<CommentHandler *>::iterator Pos = | |
1403 llvm::find(CommentHandlers, Handler); | |
1404 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); | |
1405 CommentHandlers.erase(Pos); | |
1406 } | |
1407 | |
1408 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { | |
1409 bool AnyPendingTokens = false; | |
1410 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), | |
1411 HEnd = CommentHandlers.end(); | |
1412 H != HEnd; ++H) { | |
1413 if ((*H)->HandleComment(*this, Comment)) | |
1414 AnyPendingTokens = true; | |
1415 } | |
1416 if (!AnyPendingTokens || getCommentRetentionState()) | |
1417 return false; | |
1418 Lex(result); | |
1419 return true; | |
1420 } | |
1421 | |
1422 ModuleLoader::~ModuleLoader() = default; | |
1423 | |
1424 CommentHandler::~CommentHandler() = default; | |
1425 | |
1426 CodeCompletionHandler::~CodeCompletionHandler() = default; | |
1427 | |
1428 void Preprocessor::createPreprocessingRecord() { | |
1429 if (Record) | |
1430 return; | |
1431 | |
1432 Record = new PreprocessingRecord(getSourceManager()); | |
1433 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); | |
1434 } | |
197 | 1435 |
1436 #ifndef noCbC | |
1437 | |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1438 Token Preprocessor::ReadFromString(const char *src , SourceLocation Loc) { |
197 | 1439 // Push the ( "string" ) tokens into the token stream. |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1440 MacroInfo *MI = AllocateMacroInfo(Loc); |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1441 Token Tok,FirstTok,Result; |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1442 bool Invalid = false; |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1443 Lexer lx(CurLexer->getFileID(),getSourceManager().getBuffer(CurLexer->getFileID(), Loc, &Invalid),*this); |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1444 lx.InitLexer(src,src,src + strlen(src)); |
199 | 1445 lx.ParsingPreprocessorDirective = true ; // to prevent from EOF sucide of CurLexer in PP |
198
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1446 lx.Lex(Tok); |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1447 FirstTok = Tok; |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1448 while (Tok.getKind() != tok::TokenKind::eof) { |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1449 MI->AddTokenToBody(Tok); |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1450 lx.Lex(Tok); |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1451 } |
cef006dc7fd5
CurLexer vanish after EnterMacro finish
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
197
diff
changeset
|
1452 EnterMacro(Result, Loc, MI , 0 ); |
199 | 1453 // EnterTokenStream(&MI->ReplacementTokens,MI->getNumTokens(),true,true,false); |
197 | 1454 return Result; |
1455 } | |
1456 | |
1457 #endif |