150
|
1 //===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 ///
|
|
9 /// \file
|
|
10 /// This is the implementation for minimizing header and source files to the
|
|
11 /// minimum necessary preprocessor directives for evaluating includes. It
|
|
12 /// reduces the source down to #define, #include, #import, @import, and any
|
|
13 /// conditional preprocessor logic that contains one of those.
|
|
14 ///
|
|
15 //===----------------------------------------------------------------------===//
|
|
16
|
|
17 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
|
|
18 #include "clang/Basic/CharInfo.h"
|
|
19 #include "clang/Basic/Diagnostic.h"
|
|
20 #include "clang/Lex/LexDiagnostic.h"
|
173
|
21 #include "llvm/ADT/StringMap.h"
|
150
|
22 #include "llvm/ADT/StringSwitch.h"
|
|
23 #include "llvm/Support/MemoryBuffer.h"
|
|
24
|
|
25 using namespace llvm;
|
|
26 using namespace clang;
|
|
27 using namespace clang::minimize_source_to_dependency_directives;
|
|
28
|
|
29 namespace {
|
|
30
|
|
31 struct Minimizer {
|
|
32 /// Minimized output.
|
|
33 SmallVectorImpl<char> &Out;
|
|
34 /// The known tokens encountered during the minimization.
|
|
35 SmallVectorImpl<Token> &Tokens;
|
|
36
|
|
37 Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
|
|
38 StringRef Input, DiagnosticsEngine *Diags,
|
|
39 SourceLocation InputSourceLoc)
|
|
40 : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
|
|
41 InputSourceLoc(InputSourceLoc) {}
|
|
42
|
|
43 /// Lex the provided source and emit the minimized output.
|
|
44 ///
|
|
45 /// \returns True on error.
|
|
46 bool minimize();
|
|
47
|
|
48 private:
|
|
49 struct IdInfo {
|
|
50 const char *Last;
|
|
51 StringRef Name;
|
|
52 };
|
|
53
|
|
54 /// Lex an identifier.
|
|
55 ///
|
|
56 /// \pre First points at a valid identifier head.
|
|
57 LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
|
|
58 LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
|
|
59 const char *const End);
|
|
60 LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
|
|
61 LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
|
|
62 LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
|
|
63 LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
|
|
64 LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
|
|
65 LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
|
|
66 LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
|
|
67 LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
|
|
68 const char *&First, const char *const End);
|
|
69 Token &makeToken(TokenKind K) {
|
|
70 Tokens.emplace_back(K, Out.size());
|
|
71 return Tokens.back();
|
|
72 }
|
|
73 void popToken() {
|
|
74 Out.resize(Tokens.back().Offset);
|
|
75 Tokens.pop_back();
|
|
76 }
|
|
77 TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
|
|
78
|
|
79 Minimizer &put(char Byte) {
|
|
80 Out.push_back(Byte);
|
|
81 return *this;
|
|
82 }
|
|
83 Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
|
|
84 Minimizer &append(const char *First, const char *Last) {
|
|
85 Out.append(First, Last);
|
|
86 return *this;
|
|
87 }
|
|
88
|
|
89 void printToNewline(const char *&First, const char *const End);
|
|
90 void printAdjacentModuleNameParts(const char *&First, const char *const End);
|
|
91 LLVM_NODISCARD bool printAtImportBody(const char *&First,
|
|
92 const char *const End);
|
|
93 void printDirectiveBody(const char *&First, const char *const End);
|
|
94 void printAdjacentMacroArgs(const char *&First, const char *const End);
|
|
95 LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
|
|
96
|
|
97 /// Reports a diagnostic if the diagnostic engine is provided. Always returns
|
|
98 /// true at the end.
|
|
99 bool reportError(const char *CurPtr, unsigned Err);
|
|
100
|
|
101 StringMap<char> SplitIds;
|
|
102 StringRef Input;
|
|
103 DiagnosticsEngine *Diags;
|
|
104 SourceLocation InputSourceLoc;
|
|
105 };
|
|
106
|
|
107 } // end anonymous namespace
|
|
108
|
|
109 bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
|
|
110 if (!Diags)
|
|
111 return true;
|
|
112 assert(CurPtr >= Input.data() && "invalid buffer ptr");
|
|
113 Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
|
|
114 return true;
|
|
115 }
|
|
116
|
|
117 static void skipOverSpaces(const char *&First, const char *const End) {
|
|
118 while (First != End && isHorizontalWhitespace(*First))
|
|
119 ++First;
|
|
120 }
|
|
121
|
|
122 LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
|
|
123 const char *Current) {
|
|
124 assert(First <= Current);
|
|
125
|
|
126 // Check if we can even back up.
|
|
127 if (*Current != '"' || First == Current)
|
|
128 return false;
|
|
129
|
|
130 // Check for an "R".
|
|
131 --Current;
|
|
132 if (*Current != 'R')
|
|
133 return false;
|
|
134 if (First == Current || !isIdentifierBody(*--Current))
|
|
135 return true;
|
|
136
|
|
137 // Check for a prefix of "u", "U", or "L".
|
|
138 if (*Current == 'u' || *Current == 'U' || *Current == 'L')
|
|
139 return First == Current || !isIdentifierBody(*--Current);
|
|
140
|
|
141 // Check for a prefix of "u8".
|
|
142 if (*Current != '8' || First == Current || *Current-- != 'u')
|
|
143 return false;
|
|
144 return First == Current || !isIdentifierBody(*--Current);
|
|
145 }
|
|
146
|
|
147 static void skipRawString(const char *&First, const char *const End) {
|
|
148 assert(First[0] == '"');
|
|
149 assert(First[-1] == 'R');
|
|
150
|
|
151 const char *Last = ++First;
|
|
152 while (Last != End && *Last != '(')
|
|
153 ++Last;
|
|
154 if (Last == End) {
|
|
155 First = Last; // Hit the end... just give up.
|
|
156 return;
|
|
157 }
|
|
158
|
|
159 StringRef Terminator(First, Last - First);
|
|
160 for (;;) {
|
|
161 // Move First to just past the next ")".
|
|
162 First = Last;
|
|
163 while (First != End && *First != ')')
|
|
164 ++First;
|
|
165 if (First == End)
|
|
166 return;
|
|
167 ++First;
|
|
168
|
|
169 // Look ahead for the terminator sequence.
|
|
170 Last = First;
|
|
171 while (Last != End && size_t(Last - First) < Terminator.size() &&
|
|
172 Terminator[Last - First] == *Last)
|
|
173 ++Last;
|
|
174
|
|
175 // Check if we hit it (or the end of the file).
|
|
176 if (Last == End) {
|
|
177 First = Last;
|
|
178 return;
|
|
179 }
|
|
180 if (size_t(Last - First) < Terminator.size())
|
|
181 continue;
|
|
182 if (*Last != '"')
|
|
183 continue;
|
|
184 First = Last + 1;
|
|
185 return;
|
|
186 }
|
|
187 }
|
|
188
|
|
189 // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
|
|
190 static unsigned isEOL(const char *First, const char *const End) {
|
|
191 if (First == End)
|
|
192 return 0;
|
|
193 if (End - First > 1 && isVerticalWhitespace(First[0]) &&
|
|
194 isVerticalWhitespace(First[1]) && First[0] != First[1])
|
|
195 return 2;
|
|
196 return !!isVerticalWhitespace(First[0]);
|
|
197 }
|
|
198
|
|
199 static void skipString(const char *&First, const char *const End) {
|
|
200 assert(*First == '\'' || *First == '"' || *First == '<');
|
|
201 const char Terminator = *First == '<' ? '>' : *First;
|
|
202 for (++First; First != End && *First != Terminator; ++First) {
|
|
203 // String and character literals don't extend past the end of the line.
|
|
204 if (isVerticalWhitespace(*First))
|
|
205 return;
|
|
206 if (*First != '\\')
|
|
207 continue;
|
|
208 // Skip past backslash to the next character. This ensures that the
|
|
209 // character right after it is skipped as well, which matters if it's
|
|
210 // the terminator.
|
|
211 if (++First == End)
|
|
212 return;
|
|
213 if (!isWhitespace(*First))
|
|
214 continue;
|
|
215 // Whitespace after the backslash might indicate a line continuation.
|
|
216 const char *FirstAfterBackslashPastSpace = First;
|
|
217 skipOverSpaces(FirstAfterBackslashPastSpace, End);
|
|
218 if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
|
|
219 // Advance the character pointer to the next line for the next
|
|
220 // iteration.
|
|
221 First = FirstAfterBackslashPastSpace + NLSize - 1;
|
|
222 }
|
|
223 }
|
|
224 if (First != End)
|
|
225 ++First; // Finish off the string.
|
|
226 }
|
|
227
|
|
228 // Returns the length of the skipped newline
|
|
229 static unsigned skipNewline(const char *&First, const char *End) {
|
|
230 if (First == End)
|
|
231 return 0;
|
|
232 assert(isVerticalWhitespace(*First));
|
|
233 unsigned Len = isEOL(First, End);
|
|
234 assert(Len && "expected newline");
|
|
235 First += Len;
|
|
236 return Len;
|
|
237 }
|
|
238
|
|
239 static bool wasLineContinuation(const char *First, unsigned EOLLen) {
|
|
240 return *(First - (int)EOLLen - 1) == '\\';
|
|
241 }
|
|
242
|
|
243 static void skipToNewlineRaw(const char *&First, const char *const End) {
|
|
244 for (;;) {
|
|
245 if (First == End)
|
|
246 return;
|
|
247
|
|
248 unsigned Len = isEOL(First, End);
|
|
249 if (Len)
|
|
250 return;
|
|
251
|
|
252 do {
|
|
253 if (++First == End)
|
|
254 return;
|
|
255 Len = isEOL(First, End);
|
|
256 } while (!Len);
|
|
257
|
|
258 if (First[-1] != '\\')
|
|
259 return;
|
|
260
|
|
261 First += Len;
|
|
262 // Keep skipping lines...
|
|
263 }
|
|
264 }
|
|
265
|
|
266 static const char *findLastNonSpace(const char *First, const char *Last) {
|
|
267 assert(First <= Last);
|
|
268 while (First != Last && isHorizontalWhitespace(Last[-1]))
|
|
269 --Last;
|
|
270 return Last;
|
|
271 }
|
|
272
|
|
273 static const char *findFirstTrailingSpace(const char *First,
|
|
274 const char *Last) {
|
|
275 const char *LastNonSpace = findLastNonSpace(First, Last);
|
|
276 if (Last == LastNonSpace)
|
|
277 return Last;
|
|
278 assert(isHorizontalWhitespace(LastNonSpace[0]));
|
|
279 return LastNonSpace + 1;
|
|
280 }
|
|
281
|
|
282 static void skipLineComment(const char *&First, const char *const End) {
|
|
283 assert(First[0] == '/' && First[1] == '/');
|
|
284 First += 2;
|
|
285 skipToNewlineRaw(First, End);
|
|
286 }
|
|
287
|
|
288 static void skipBlockComment(const char *&First, const char *const End) {
|
|
289 assert(First[0] == '/' && First[1] == '*');
|
|
290 if (End - First < 4) {
|
|
291 First = End;
|
|
292 return;
|
|
293 }
|
|
294 for (First += 3; First != End; ++First)
|
|
295 if (First[-1] == '*' && First[0] == '/') {
|
|
296 ++First;
|
|
297 return;
|
|
298 }
|
|
299 }
|
|
300
|
|
301 /// \returns True if the current single quotation mark character is a C++ 14
|
|
302 /// digit separator.
|
|
303 static bool isQuoteCppDigitSeparator(const char *const Start,
|
|
304 const char *const Cur,
|
|
305 const char *const End) {
|
|
306 assert(*Cur == '\'' && "expected quotation character");
|
|
307 // skipLine called in places where we don't expect a valid number
|
|
308 // body before `start` on the same line, so always return false at the start.
|
|
309 if (Start == Cur)
|
|
310 return false;
|
|
311 // The previous character must be a valid PP number character.
|
|
312 // Make sure that the L, u, U, u8 prefixes don't get marked as a
|
|
313 // separator though.
|
|
314 char Prev = *(Cur - 1);
|
|
315 if (Prev == 'L' || Prev == 'U' || Prev == 'u')
|
|
316 return false;
|
|
317 if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
|
|
318 return false;
|
|
319 if (!isPreprocessingNumberBody(Prev))
|
|
320 return false;
|
|
321 // The next character should be a valid identifier body character.
|
|
322 return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
|
|
323 }
|
|
324
|
|
325 static void skipLine(const char *&First, const char *const End) {
|
|
326 for (;;) {
|
|
327 assert(First <= End);
|
|
328 if (First == End)
|
|
329 return;
|
|
330
|
|
331 if (isVerticalWhitespace(*First)) {
|
|
332 skipNewline(First, End);
|
|
333 return;
|
|
334 }
|
|
335 const char *Start = First;
|
|
336 while (First != End && !isVerticalWhitespace(*First)) {
|
|
337 // Iterate over strings correctly to avoid comments and newlines.
|
|
338 if (*First == '"' ||
|
|
339 (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
|
|
340 if (isRawStringLiteral(Start, First))
|
|
341 skipRawString(First, End);
|
|
342 else
|
|
343 skipString(First, End);
|
|
344 continue;
|
|
345 }
|
|
346
|
|
347 // Iterate over comments correctly.
|
|
348 if (*First != '/' || End - First < 2) {
|
|
349 ++First;
|
|
350 continue;
|
|
351 }
|
|
352
|
|
353 if (First[1] == '/') {
|
|
354 // "//...".
|
|
355 skipLineComment(First, End);
|
|
356 continue;
|
|
357 }
|
|
358
|
|
359 if (First[1] != '*') {
|
|
360 ++First;
|
|
361 continue;
|
|
362 }
|
|
363
|
|
364 // "/*...*/".
|
|
365 skipBlockComment(First, End);
|
|
366 }
|
|
367 if (First == End)
|
|
368 return;
|
|
369
|
|
370 // Skip over the newline.
|
|
371 unsigned Len = skipNewline(First, End);
|
|
372 if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
|
|
373 break;
|
|
374 }
|
|
375 }
|
|
376
|
|
377 static void skipDirective(StringRef Name, const char *&First,
|
|
378 const char *const End) {
|
|
379 if (llvm::StringSwitch<bool>(Name)
|
|
380 .Case("warning", true)
|
|
381 .Case("error", true)
|
|
382 .Default(false))
|
|
383 // Do not process quotes or comments.
|
|
384 skipToNewlineRaw(First, End);
|
|
385 else
|
|
386 skipLine(First, End);
|
|
387 }
|
|
388
|
|
389 void Minimizer::printToNewline(const char *&First, const char *const End) {
|
|
390 while (First != End && !isVerticalWhitespace(*First)) {
|
|
391 const char *Last = First;
|
|
392 do {
|
|
393 // Iterate over strings correctly to avoid comments and newlines.
|
|
394 if (*Last == '"' || *Last == '\'' ||
|
|
395 (*Last == '<' && top() == pp_include)) {
|
|
396 if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
|
|
397 skipRawString(Last, End);
|
|
398 else
|
|
399 skipString(Last, End);
|
|
400 continue;
|
|
401 }
|
|
402 if (*Last != '/' || End - Last < 2) {
|
|
403 ++Last;
|
|
404 continue; // Gather the rest up to print verbatim.
|
|
405 }
|
|
406
|
|
407 if (Last[1] != '/' && Last[1] != '*') {
|
|
408 ++Last;
|
|
409 continue;
|
|
410 }
|
|
411
|
|
412 // Deal with "//..." and "/*...*/".
|
|
413 append(First, findFirstTrailingSpace(First, Last));
|
|
414 First = Last;
|
|
415
|
|
416 if (Last[1] == '/') {
|
|
417 skipLineComment(First, End);
|
|
418 return;
|
|
419 }
|
|
420
|
|
421 put(' ');
|
|
422 skipBlockComment(First, End);
|
|
423 skipOverSpaces(First, End);
|
|
424 Last = First;
|
|
425 } while (Last != End && !isVerticalWhitespace(*Last));
|
|
426
|
|
427 // Print out the string.
|
|
428 const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
|
|
429 if (Last == End || LastBeforeTrailingSpace == First ||
|
|
430 LastBeforeTrailingSpace[-1] != '\\') {
|
|
431 append(First, LastBeforeTrailingSpace);
|
|
432 First = Last;
|
|
433 skipNewline(First, End);
|
|
434 return;
|
|
435 }
|
|
436
|
|
437 // Print up to the backslash, backing up over spaces. Preserve at least one
|
|
438 // space, as the space matters when tokens are separated by a line
|
|
439 // continuation.
|
|
440 append(First, findFirstTrailingSpace(
|
|
441 First, LastBeforeTrailingSpace - 1));
|
|
442
|
|
443 First = Last;
|
|
444 skipNewline(First, End);
|
|
445 skipOverSpaces(First, End);
|
|
446 }
|
|
447 }
|
|
448
|
|
449 static void skipWhitespace(const char *&First, const char *const End) {
|
|
450 for (;;) {
|
|
451 assert(First <= End);
|
|
452 skipOverSpaces(First, End);
|
|
453
|
|
454 if (End - First < 2)
|
|
455 return;
|
|
456
|
|
457 if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
|
|
458 skipNewline(++First, End);
|
|
459 continue;
|
|
460 }
|
|
461
|
|
462 // Check for a non-comment character.
|
|
463 if (First[0] != '/')
|
|
464 return;
|
|
465
|
|
466 // "// ...".
|
|
467 if (First[1] == '/') {
|
|
468 skipLineComment(First, End);
|
|
469 return;
|
|
470 }
|
|
471
|
|
472 // Cannot be a comment.
|
|
473 if (First[1] != '*')
|
|
474 return;
|
|
475
|
|
476 // "/*...*/".
|
|
477 skipBlockComment(First, End);
|
|
478 }
|
|
479 }
|
|
480
|
|
481 void Minimizer::printAdjacentModuleNameParts(const char *&First,
|
|
482 const char *const End) {
|
|
483 // Skip over parts of the body.
|
|
484 const char *Last = First;
|
|
485 do
|
|
486 ++Last;
|
|
487 while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
|
|
488 append(First, Last);
|
|
489 First = Last;
|
|
490 }
|
|
491
|
|
492 bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
|
|
493 for (;;) {
|
|
494 skipWhitespace(First, End);
|
|
495 if (First == End)
|
|
496 return true;
|
|
497
|
|
498 if (isVerticalWhitespace(*First)) {
|
|
499 skipNewline(First, End);
|
|
500 continue;
|
|
501 }
|
|
502
|
|
503 // Found a semicolon.
|
|
504 if (*First == ';') {
|
|
505 put(*First++).put('\n');
|
|
506 return false;
|
|
507 }
|
|
508
|
|
509 // Don't handle macro expansions inside @import for now.
|
|
510 if (!isIdentifierBody(*First) && *First != '.')
|
|
511 return true;
|
|
512
|
|
513 printAdjacentModuleNameParts(First, End);
|
|
514 }
|
|
515 }
|
|
516
|
|
517 void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
|
|
518 skipWhitespace(First, End); // Skip initial whitespace.
|
|
519 printToNewline(First, End);
|
|
520 while (Out.back() == ' ')
|
|
521 Out.pop_back();
|
|
522 put('\n');
|
|
523 }
|
|
524
|
|
525 LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
|
|
526 const char *const End) {
|
|
527 assert(isIdentifierBody(*First) && "invalid identifer");
|
|
528 const char *Last = First + 1;
|
|
529 while (Last != End && isIdentifierBody(*Last))
|
|
530 ++Last;
|
|
531 return Last;
|
|
532 }
|
|
533
|
|
534 LLVM_NODISCARD static const char *
|
|
535 getIdentifierContinuation(const char *First, const char *const End) {
|
|
536 if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
|
|
537 return nullptr;
|
|
538
|
|
539 ++First;
|
|
540 skipNewline(First, End);
|
|
541 if (First == End)
|
|
542 return nullptr;
|
|
543 return isIdentifierBody(First[0]) ? First : nullptr;
|
|
544 }
|
|
545
|
|
546 Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
|
|
547 const char *const End) {
|
|
548 const char *Last = lexRawIdentifier(First, End);
|
|
549 const char *Next = getIdentifierContinuation(Last, End);
|
|
550 if (LLVM_LIKELY(!Next))
|
|
551 return IdInfo{Last, StringRef(First, Last - First)};
|
|
552
|
|
553 // Slow path, where identifiers are split over lines.
|
|
554 SmallVector<char, 64> Id(First, Last);
|
|
555 while (Next) {
|
|
556 Last = lexRawIdentifier(Next, End);
|
|
557 Id.append(Next, Last);
|
|
558 Next = getIdentifierContinuation(Last, End);
|
|
559 }
|
|
560 return IdInfo{
|
|
561 Last,
|
|
562 SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
|
|
563 }
|
|
564
|
|
565 void Minimizer::printAdjacentMacroArgs(const char *&First,
|
|
566 const char *const End) {
|
|
567 // Skip over parts of the body.
|
|
568 const char *Last = First;
|
|
569 do
|
|
570 ++Last;
|
|
571 while (Last != End &&
|
|
572 (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
|
|
573 append(First, Last);
|
|
574 First = Last;
|
|
575 }
|
|
576
|
|
577 bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
|
|
578 assert(*First == '(');
|
|
579 put(*First++);
|
|
580 for (;;) {
|
|
581 skipWhitespace(First, End);
|
|
582 if (First == End)
|
|
583 return true;
|
|
584
|
|
585 if (*First == ')') {
|
|
586 put(*First++);
|
|
587 return false;
|
|
588 }
|
|
589
|
|
590 // This is intentionally fairly liberal.
|
|
591 if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
|
|
592 return true;
|
|
593
|
|
594 printAdjacentMacroArgs(First, End);
|
|
595 }
|
|
596 }
|
|
597
|
|
598 /// Looks for an identifier starting from Last.
|
|
599 ///
|
|
600 /// Updates "First" to just past the next identifier, if any. Returns true iff
|
|
601 /// the identifier matches "Id".
|
|
602 bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
|
|
603 const char *const End) {
|
|
604 skipWhitespace(First, End);
|
|
605 if (First == End || !isIdentifierHead(*First))
|
|
606 return false;
|
|
607
|
|
608 IdInfo FoundId = lexIdentifier(First, End);
|
|
609 First = FoundId.Last;
|
|
610 return FoundId.Name == Id;
|
|
611 }
|
|
612
|
|
613 bool Minimizer::lexAt(const char *&First, const char *const End) {
|
|
614 // Handle "@import".
|
|
615 const char *ImportLoc = First++;
|
|
616 if (!isNextIdentifier("import", First, End)) {
|
|
617 skipLine(First, End);
|
|
618 return false;
|
|
619 }
|
|
620 makeToken(decl_at_import);
|
|
621 append("@import ");
|
|
622 if (printAtImportBody(First, End))
|
|
623 return reportError(
|
|
624 ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
|
|
625 skipWhitespace(First, End);
|
|
626 if (First == End)
|
|
627 return false;
|
|
628 if (!isVerticalWhitespace(*First))
|
|
629 return reportError(
|
|
630 ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
|
|
631 skipNewline(First, End);
|
|
632 return false;
|
|
633 }
|
|
634
|
|
635 bool Minimizer::lexModule(const char *&First, const char *const End) {
|
|
636 IdInfo Id = lexIdentifier(First, End);
|
|
637 First = Id.Last;
|
|
638 bool Export = false;
|
|
639 if (Id.Name == "export") {
|
|
640 Export = true;
|
|
641 skipWhitespace(First, End);
|
|
642 if (!isIdentifierBody(*First)) {
|
|
643 skipLine(First, End);
|
|
644 return false;
|
|
645 }
|
|
646 Id = lexIdentifier(First, End);
|
|
647 First = Id.Last;
|
|
648 }
|
|
649
|
|
650 if (Id.Name != "module" && Id.Name != "import") {
|
|
651 skipLine(First, End);
|
|
652 return false;
|
|
653 }
|
|
654
|
|
655 skipWhitespace(First, End);
|
|
656
|
|
657 // Ignore this as a module directive if the next character can't be part of
|
|
658 // an import.
|
|
659
|
|
660 switch (*First) {
|
|
661 case ':':
|
|
662 case '<':
|
|
663 case '"':
|
|
664 break;
|
|
665 default:
|
|
666 if (!isIdentifierBody(*First)) {
|
|
667 skipLine(First, End);
|
|
668 return false;
|
|
669 }
|
|
670 }
|
|
671
|
|
672 if (Export) {
|
|
673 makeToken(cxx_export_decl);
|
|
674 append("export ");
|
|
675 }
|
|
676
|
|
677 if (Id.Name == "module")
|
|
678 makeToken(cxx_module_decl);
|
|
679 else
|
|
680 makeToken(cxx_import_decl);
|
|
681 append(Id.Name);
|
|
682 append(" ");
|
|
683 printToNewline(First, End);
|
|
684 append("\n");
|
|
685 return false;
|
|
686 }
|
|
687
|
|
688 bool Minimizer::lexDefine(const char *&First, const char *const End) {
|
|
689 makeToken(pp_define);
|
|
690 append("#define ");
|
|
691 skipWhitespace(First, End);
|
|
692
|
|
693 if (!isIdentifierHead(*First))
|
|
694 return reportError(First, diag::err_pp_macro_not_identifier);
|
|
695
|
|
696 IdInfo Id = lexIdentifier(First, End);
|
|
697 const char *Last = Id.Last;
|
|
698 append(Id.Name);
|
|
699 if (Last == End)
|
|
700 return false;
|
|
701 if (*Last == '(') {
|
|
702 size_t Size = Out.size();
|
|
703 if (printMacroArgs(Last, End)) {
|
|
704 // Be robust to bad macro arguments, since they can show up in disabled
|
|
705 // code.
|
|
706 Out.resize(Size);
|
|
707 append("(/* invalid */\n");
|
|
708 skipLine(Last, End);
|
|
709 return false;
|
|
710 }
|
|
711 }
|
|
712 skipWhitespace(Last, End);
|
|
713 if (Last == End)
|
|
714 return false;
|
|
715 if (!isVerticalWhitespace(*Last))
|
|
716 put(' ');
|
|
717 printDirectiveBody(Last, End);
|
|
718 First = Last;
|
|
719 return false;
|
|
720 }
|
|
721
|
|
722 bool Minimizer::lexPragma(const char *&First, const char *const End) {
|
|
723 // #pragma.
|
|
724 skipWhitespace(First, End);
|
|
725 if (First == End || !isIdentifierHead(*First))
|
|
726 return false;
|
|
727
|
|
728 IdInfo FoundId = lexIdentifier(First, End);
|
|
729 First = FoundId.Last;
|
|
730 if (FoundId.Name == "once") {
|
|
731 // #pragma once
|
|
732 skipLine(First, End);
|
|
733 makeToken(pp_pragma_once);
|
|
734 append("#pragma once\n");
|
|
735 return false;
|
|
736 }
|
|
737
|
|
738 if (FoundId.Name != "clang") {
|
|
739 skipLine(First, End);
|
|
740 return false;
|
|
741 }
|
|
742
|
|
743 // #pragma clang.
|
|
744 if (!isNextIdentifier("module", First, End)) {
|
|
745 skipLine(First, End);
|
|
746 return false;
|
|
747 }
|
|
748
|
|
749 // #pragma clang module.
|
|
750 if (!isNextIdentifier("import", First, End)) {
|
|
751 skipLine(First, End);
|
|
752 return false;
|
|
753 }
|
|
754
|
|
755 // #pragma clang module import.
|
|
756 makeToken(pp_pragma_import);
|
|
757 append("#pragma clang module import ");
|
|
758 printDirectiveBody(First, End);
|
|
759 return false;
|
|
760 }
|
|
761
|
|
762 bool Minimizer::lexEndif(const char *&First, const char *const End) {
|
|
763 // Strip out "#else" if it's empty.
|
|
764 if (top() == pp_else)
|
|
765 popToken();
|
|
766
|
|
767 // If "#ifdef" is empty, strip it and skip the "#endif".
|
|
768 //
|
|
769 // FIXME: Once/if Clang starts disallowing __has_include in macro expansions,
|
|
770 // we can skip empty `#if` and `#elif` blocks as well after scanning for a
|
|
771 // literal __has_include in the condition. Even without that rule we could
|
|
772 // drop the tokens if we scan for identifiers in the condition and find none.
|
|
773 if (top() == pp_ifdef || top() == pp_ifndef) {
|
|
774 popToken();
|
|
775 skipLine(First, End);
|
|
776 return false;
|
|
777 }
|
|
778
|
|
779 return lexDefault(pp_endif, "endif", First, End);
|
|
780 }
|
|
781
|
|
782 bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
|
|
783 const char *&First, const char *const End) {
|
|
784 makeToken(Kind);
|
|
785 put('#').append(Directive).put(' ');
|
|
786 printDirectiveBody(First, End);
|
|
787 return false;
|
|
788 }
|
|
789
|
|
790 static bool isStartOfRelevantLine(char First) {
|
|
791 switch (First) {
|
|
792 case '#':
|
|
793 case '@':
|
|
794 case 'i':
|
|
795 case 'e':
|
|
796 case 'm':
|
|
797 return true;
|
|
798 }
|
|
799 return false;
|
|
800 }
|
|
801
|
|
802 bool Minimizer::lexPPLine(const char *&First, const char *const End) {
|
|
803 assert(First != End);
|
|
804
|
|
805 skipWhitespace(First, End);
|
|
806 assert(First <= End);
|
|
807 if (First == End)
|
|
808 return false;
|
|
809
|
|
810 if (!isStartOfRelevantLine(*First)) {
|
|
811 skipLine(First, End);
|
|
812 assert(First <= End);
|
|
813 return false;
|
|
814 }
|
|
815
|
|
816 // Handle "@import".
|
|
817 if (*First == '@')
|
|
818 return lexAt(First, End);
|
|
819
|
|
820 if (*First == 'i' || *First == 'e' || *First == 'm')
|
|
821 return lexModule(First, End);
|
|
822
|
|
823 // Handle preprocessing directives.
|
|
824 ++First; // Skip over '#'.
|
|
825 skipWhitespace(First, End);
|
|
826
|
|
827 if (First == End)
|
|
828 return reportError(First, diag::err_pp_expected_eol);
|
|
829
|
|
830 if (!isIdentifierHead(*First)) {
|
|
831 skipLine(First, End);
|
|
832 return false;
|
|
833 }
|
|
834
|
|
835 // Figure out the token.
|
|
836 IdInfo Id = lexIdentifier(First, End);
|
|
837 First = Id.Last;
|
|
838 auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
|
|
839 .Case("include", pp_include)
|
|
840 .Case("__include_macros", pp___include_macros)
|
|
841 .Case("define", pp_define)
|
|
842 .Case("undef", pp_undef)
|
|
843 .Case("import", pp_import)
|
|
844 .Case("include_next", pp_include_next)
|
|
845 .Case("if", pp_if)
|
|
846 .Case("ifdef", pp_ifdef)
|
|
847 .Case("ifndef", pp_ifndef)
|
|
848 .Case("elif", pp_elif)
|
|
849 .Case("else", pp_else)
|
|
850 .Case("endif", pp_endif)
|
|
851 .Case("pragma", pp_pragma_import)
|
|
852 .Default(pp_none);
|
|
853 if (Kind == pp_none) {
|
|
854 skipDirective(Id.Name, First, End);
|
|
855 return false;
|
|
856 }
|
|
857
|
|
858 if (Kind == pp_endif)
|
|
859 return lexEndif(First, End);
|
|
860
|
|
861 if (Kind == pp_define)
|
|
862 return lexDefine(First, End);
|
|
863
|
|
864 if (Kind == pp_pragma_import)
|
|
865 return lexPragma(First, End);
|
|
866
|
|
867 // Everything else.
|
|
868 return lexDefault(Kind, Id.Name, First, End);
|
|
869 }
|
|
870
|
|
871 static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
|
|
872 if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
|
|
873 First[2] == '\xbf')
|
|
874 First += 3;
|
|
875 }
|
|
876
|
|
877 bool Minimizer::minimizeImpl(const char *First, const char *const End) {
|
|
878 skipUTF8ByteOrderMark(First, End);
|
|
879 while (First != End)
|
|
880 if (lexPPLine(First, End))
|
|
881 return true;
|
|
882 return false;
|
|
883 }
|
|
884
|
|
885 bool Minimizer::minimize() {
|
|
886 bool Error = minimizeImpl(Input.begin(), Input.end());
|
|
887
|
|
888 if (!Error) {
|
|
889 // Add a trailing newline and an EOF on success.
|
|
890 if (!Out.empty() && Out.back() != '\n')
|
|
891 Out.push_back('\n');
|
|
892 makeToken(pp_eof);
|
|
893 }
|
|
894
|
|
895 // Null-terminate the output. This way the memory buffer that's passed to
|
|
896 // Clang will not have to worry about the terminating '\0'.
|
|
897 Out.push_back(0);
|
|
898 Out.pop_back();
|
|
899 return Error;
|
|
900 }
|
|
901
|
|
902 bool clang::minimize_source_to_dependency_directives::computeSkippedRanges(
|
|
903 ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
|
|
904 struct Directive {
|
|
905 enum DirectiveKind {
|
|
906 If, // if/ifdef/ifndef
|
|
907 Else // elif,else
|
|
908 };
|
|
909 int Offset;
|
|
910 DirectiveKind Kind;
|
|
911 };
|
|
912 llvm::SmallVector<Directive, 32> Offsets;
|
|
913 for (const Token &T : Input) {
|
|
914 switch (T.K) {
|
|
915 case pp_if:
|
|
916 case pp_ifdef:
|
|
917 case pp_ifndef:
|
|
918 Offsets.push_back({T.Offset, Directive::If});
|
|
919 break;
|
|
920
|
|
921 case pp_elif:
|
|
922 case pp_else: {
|
|
923 if (Offsets.empty())
|
|
924 return true;
|
|
925 int PreviousOffset = Offsets.back().Offset;
|
|
926 Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
|
|
927 Offsets.push_back({T.Offset, Directive::Else});
|
|
928 break;
|
|
929 }
|
|
930
|
|
931 case pp_endif: {
|
|
932 if (Offsets.empty())
|
|
933 return true;
|
|
934 int PreviousOffset = Offsets.back().Offset;
|
|
935 Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
|
|
936 do {
|
|
937 Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
|
|
938 if (Kind == Directive::If)
|
|
939 break;
|
|
940 } while (!Offsets.empty());
|
|
941 break;
|
|
942 }
|
|
943 default:
|
|
944 break;
|
|
945 }
|
|
946 }
|
|
947 return false;
|
|
948 }
|
|
949
|
|
950 bool clang::minimizeSourceToDependencyDirectives(
|
|
951 StringRef Input, SmallVectorImpl<char> &Output,
|
|
952 SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
|
|
953 SourceLocation InputSourceLoc) {
|
|
954 Output.clear();
|
|
955 Tokens.clear();
|
|
956 return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
|
|
957 }
|