150
|
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 ///
|
|
9 /// \file
|
|
10 /// This file implements an indenter that manages the indentation of
|
|
11 /// continuations.
|
|
12 ///
|
|
13 //===----------------------------------------------------------------------===//
|
|
14
|
|
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
|
|
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
|
|
17
|
|
18 #include "Encoding.h"
|
|
19 #include "FormatToken.h"
|
|
20 #include "clang/Format/Format.h"
|
|
21 #include "llvm/Support/Regex.h"
|
|
22 #include <map>
|
|
23 #include <tuple>
|
|
24
|
|
25 namespace clang {
|
|
26 class SourceManager;
|
|
27
|
|
28 namespace format {
|
|
29
|
|
30 class AnnotatedLine;
|
|
31 class BreakableToken;
|
|
32 struct FormatToken;
|
|
33 struct LineState;
|
|
34 struct ParenState;
|
|
35 struct RawStringFormatStyleManager;
|
|
36 class WhitespaceManager;
|
|
37
|
|
38 struct RawStringFormatStyleManager {
|
|
39 llvm::StringMap<FormatStyle> DelimiterStyle;
|
|
40 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
|
|
41
|
|
42 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
|
|
43
|
|
44 llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
|
|
45
|
|
46 llvm::Optional<FormatStyle>
|
|
47 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
|
|
48 };
|
|
49
|
|
50 class ContinuationIndenter {
|
|
51 public:
|
|
52 /// Constructs a \c ContinuationIndenter to format \p Line starting in
|
|
53 /// column \p FirstIndent.
|
|
54 ContinuationIndenter(const FormatStyle &Style,
|
|
55 const AdditionalKeywords &Keywords,
|
|
56 const SourceManager &SourceMgr,
|
|
57 WhitespaceManager &Whitespaces,
|
|
58 encoding::Encoding Encoding,
|
|
59 bool BinPackInconclusiveFunctions);
|
|
60
|
|
61 /// Get the initial state, i.e. the state after placing \p Line's
|
|
62 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
|
|
63 /// the case of formatting inside raw string literals, \p FirstStartColumn is
|
|
64 /// the column at which the state of the parent formatter is.
|
|
65 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
|
|
66 const AnnotatedLine *Line, bool DryRun);
|
|
67
|
|
68 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
|
|
69 // better home.
|
|
70 /// Returns \c true, if a line break after \p State is allowed.
|
|
71 bool canBreak(const LineState &State);
|
|
72
|
|
73 /// Returns \c true, if a line break after \p State is mandatory.
|
|
74 bool mustBreak(const LineState &State);
|
|
75
|
|
76 /// Appends the next token to \p State and updates information
|
|
77 /// necessary for indentation.
|
|
78 ///
|
|
79 /// Puts the token on the current line if \p Newline is \c false and adds a
|
|
80 /// line break and necessary indentation otherwise.
|
|
81 ///
|
|
82 /// If \p DryRun is \c false, also creates and stores the required
|
|
83 /// \c Replacement.
|
|
84 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
|
|
85 unsigned ExtraSpaces = 0);
|
|
86
|
|
87 /// Get the column limit for this line. This is the style's column
|
|
88 /// limit, potentially reduced for preprocessor definitions.
|
|
89 unsigned getColumnLimit(const LineState &State) const;
|
|
90
|
|
91 private:
|
|
92 /// Mark the next token as consumed in \p State and modify its stacks
|
|
93 /// accordingly.
|
|
94 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
|
|
95
|
|
96 /// Update 'State' according to the next token's fake left parentheses.
|
|
97 void moveStatePastFakeLParens(LineState &State, bool Newline);
|
|
98 /// Update 'State' according to the next token's fake r_parens.
|
|
99 void moveStatePastFakeRParens(LineState &State);
|
|
100
|
|
101 /// Update 'State' according to the next token being one of "(<{[".
|
|
102 void moveStatePastScopeOpener(LineState &State, bool Newline);
|
|
103 /// Update 'State' according to the next token being one of ")>}]".
|
|
104 void moveStatePastScopeCloser(LineState &State);
|
|
105 /// Update 'State' with the next token opening a nested block.
|
|
106 void moveStateToNewBlock(LineState &State);
|
|
107
|
|
108 /// Reformats a raw string literal.
|
|
109 ///
|
|
110 /// \returns An extra penalty induced by reformatting the token.
|
|
111 unsigned reformatRawStringLiteral(const FormatToken &Current,
|
|
112 LineState &State,
|
|
113 const FormatStyle &RawStringStyle,
|
|
114 bool DryRun, bool Newline);
|
|
115
|
|
116 /// If the current token is at the end of the current line, handle
|
|
117 /// the transition to the next line.
|
|
118 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
|
|
119 bool DryRun, bool AllowBreak, bool Newline);
|
|
120
|
|
121 /// If \p Current is a raw string that is configured to be reformatted,
|
|
122 /// return the style to be used.
|
|
123 llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
|
|
124 const LineState &State);
|
|
125
|
|
126 /// If the current token sticks out over the end of the line, break
|
|
127 /// it if possible.
|
|
128 ///
|
|
129 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
|
|
130 /// when tokens are broken or lines exceed the column limit, and exceeded
|
|
131 /// indicates whether the algorithm purposefully left lines exceeding the
|
|
132 /// column limit.
|
|
133 ///
|
|
134 /// The returned penalty will cover the cost of the additional line breaks
|
|
135 /// and column limit violation in all lines except for the last one. The
|
|
136 /// penalty for the column limit violation in the last line (and in single
|
|
137 /// line tokens) is handled in \c addNextStateToQueue.
|
|
138 ///
|
|
139 /// \p Strict indicates whether reflowing is allowed to leave characters
|
|
140 /// protruding the column limit; if true, lines will be split strictly within
|
|
141 /// the column limit where possible; if false, words are allowed to protrude
|
|
142 /// over the column limit as long as the penalty is less than the penalty
|
|
143 /// of a break.
|
|
144 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
|
|
145 LineState &State,
|
|
146 bool AllowBreak, bool DryRun,
|
|
147 bool Strict);
|
|
148
|
|
149 /// Returns the \c BreakableToken starting at \p Current, or nullptr
|
|
150 /// if the current token cannot be broken.
|
|
151 std::unique_ptr<BreakableToken>
|
|
152 createBreakableToken(const FormatToken &Current, LineState &State,
|
|
153 bool AllowBreak);
|
|
154
|
|
155 /// Appends the next token to \p State and updates information
|
|
156 /// necessary for indentation.
|
|
157 ///
|
|
158 /// Puts the token on the current line.
|
|
159 ///
|
|
160 /// If \p DryRun is \c false, also creates and stores the required
|
|
161 /// \c Replacement.
|
|
162 void addTokenOnCurrentLine(LineState &State, bool DryRun,
|
|
163 unsigned ExtraSpaces);
|
|
164
|
|
165 /// Appends the next token to \p State and updates information
|
|
166 /// necessary for indentation.
|
|
167 ///
|
|
168 /// Adds a line break and necessary indentation.
|
|
169 ///
|
|
170 /// If \p DryRun is \c false, also creates and stores the required
|
|
171 /// \c Replacement.
|
|
172 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
|
|
173
|
|
174 /// Calculate the new column for a line wrap before the next token.
|
|
175 unsigned getNewLineColumn(const LineState &State);
|
|
176
|
|
177 /// Adds a multiline token to the \p State.
|
|
178 ///
|
|
179 /// \returns Extra penalty for the first line of the literal: last line is
|
|
180 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
|
|
181 /// matter, as we don't change them.
|
|
182 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
|
|
183
|
|
184 /// Returns \c true if the next token starts a multiline string
|
|
185 /// literal.
|
|
186 ///
|
|
187 /// This includes implicitly concatenated strings, strings that will be broken
|
|
188 /// by clang-format and string literals with escaped newlines.
|
|
189 bool nextIsMultilineString(const LineState &State);
|
|
190
|
|
191 FormatStyle Style;
|
|
192 const AdditionalKeywords &Keywords;
|
|
193 const SourceManager &SourceMgr;
|
|
194 WhitespaceManager &Whitespaces;
|
|
195 encoding::Encoding Encoding;
|
|
196 bool BinPackInconclusiveFunctions;
|
|
197 llvm::Regex CommentPragmasRegex;
|
|
198 const RawStringFormatStyleManager RawStringFormats;
|
|
199 };
|
|
200
|
|
201 struct ParenState {
|
|
202 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
|
|
203 bool AvoidBinPacking, bool NoLineBreak)
|
|
204 : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
|
173
|
205 NestedBlockIndent(Indent), IsAligned(false),
|
|
206 BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
|
|
207 BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
|
|
208 NoLineBreakInOperand(false), LastOperatorWrapped(true),
|
|
209 ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
|
|
210 AlignColons(true), ObjCSelectorNameFound(false),
|
|
211 HasMultipleNestedBlocks(false), NestedBlockInlined(false),
|
|
212 IsInsideObjCArrayLiteral(false), IsCSharpGenericTypeConstraint(false),
|
|
213 IsChainedConditional(false), IsWrappedConditional(false),
|
|
214 UnindentOperator(false) {}
|
150
|
215
|
|
216 /// \brief The token opening this parenthesis level, or nullptr if this level
|
|
217 /// is opened by fake parenthesis.
|
|
218 ///
|
|
219 /// Not considered for memoization as it will always have the same value at
|
|
220 /// the same token.
|
|
221 const FormatToken *Tok;
|
|
222
|
|
223 /// The position to which a specific parenthesis level needs to be
|
|
224 /// indented.
|
|
225 unsigned Indent;
|
|
226
|
|
227 /// The position of the last space on each level.
|
|
228 ///
|
|
229 /// Used e.g. to break like:
|
|
230 /// functionCall(Parameter, otherCall(
|
|
231 /// OtherParameter));
|
|
232 unsigned LastSpace;
|
|
233
|
|
234 /// If a block relative to this parenthesis level gets wrapped, indent
|
|
235 /// it this much.
|
|
236 unsigned NestedBlockIndent;
|
|
237
|
|
238 /// The position the first "<<" operator encountered on each level.
|
|
239 ///
|
|
240 /// Used to align "<<" operators. 0 if no such operator has been encountered
|
|
241 /// on a level.
|
|
242 unsigned FirstLessLess = 0;
|
|
243
|
|
244 /// The column of a \c ? in a conditional expression;
|
|
245 unsigned QuestionColumn = 0;
|
|
246
|
|
247 /// The position of the colon in an ObjC method declaration/call.
|
|
248 unsigned ColonPos = 0;
|
|
249
|
|
250 /// The start of the most recent function in a builder-type call.
|
|
251 unsigned StartOfFunctionCall = 0;
|
|
252
|
|
253 /// Contains the start of array subscript expressions, so that they
|
|
254 /// can be aligned.
|
|
255 unsigned StartOfArraySubscripts = 0;
|
|
256
|
|
257 /// If a nested name specifier was broken over multiple lines, this
|
|
258 /// contains the start column of the second line. Otherwise 0.
|
|
259 unsigned NestedNameSpecifierContinuation = 0;
|
|
260
|
|
261 /// If a call expression was broken over multiple lines, this
|
|
262 /// contains the start column of the second line. Otherwise 0.
|
|
263 unsigned CallContinuation = 0;
|
|
264
|
|
265 /// The column of the first variable name in a variable declaration.
|
|
266 ///
|
|
267 /// Used to align further variables if necessary.
|
|
268 unsigned VariablePos = 0;
|
|
269
|
173
|
270 /// Whether this block's indentation is used for alignment.
|
|
271 bool IsAligned : 1;
|
|
272
|
150
|
273 /// Whether a newline needs to be inserted before the block's closing
|
|
274 /// brace.
|
|
275 ///
|
|
276 /// We only want to insert a newline before the closing brace if there also
|
|
277 /// was a newline after the beginning left brace.
|
|
278 bool BreakBeforeClosingBrace : 1;
|
|
279
|
|
280 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
|
|
281 /// lines, in this context.
|
|
282 bool AvoidBinPacking : 1;
|
|
283
|
|
284 /// Break after the next comma (or all the commas in this context if
|
|
285 /// \c AvoidBinPacking is \c true).
|
|
286 bool BreakBeforeParameter : 1;
|
|
287
|
|
288 /// Line breaking in this context would break a formatting rule.
|
|
289 bool NoLineBreak : 1;
|
|
290
|
|
291 /// Same as \c NoLineBreak, but is restricted until the end of the
|
|
292 /// operand (including the next ",").
|
|
293 bool NoLineBreakInOperand : 1;
|
|
294
|
|
295 /// True if the last binary operator on this level was wrapped to the
|
|
296 /// next line.
|
|
297 bool LastOperatorWrapped : 1;
|
|
298
|
|
299 /// \c true if this \c ParenState already contains a line-break.
|
|
300 ///
|
|
301 /// The first line break in a certain \c ParenState causes extra penalty so
|
|
302 /// that clang-format prefers similar breaks, i.e. breaks in the same
|
|
303 /// parenthesis.
|
|
304 bool ContainsLineBreak : 1;
|
|
305
|
|
306 /// \c true if this \c ParenState contains multiple segments of a
|
|
307 /// builder-type call on one line.
|
|
308 bool ContainsUnwrappedBuilder : 1;
|
|
309
|
|
310 /// \c true if the colons of the curren ObjC method expression should
|
|
311 /// be aligned.
|
|
312 ///
|
|
313 /// Not considered for memoization as it will always have the same value at
|
|
314 /// the same token.
|
|
315 bool AlignColons : 1;
|
|
316
|
|
317 /// \c true if at least one selector name was found in the current
|
|
318 /// ObjC method expression.
|
|
319 ///
|
|
320 /// Not considered for memoization as it will always have the same value at
|
|
321 /// the same token.
|
|
322 bool ObjCSelectorNameFound : 1;
|
|
323
|
|
324 /// \c true if there are multiple nested blocks inside these parens.
|
|
325 ///
|
|
326 /// Not considered for memoization as it will always have the same value at
|
|
327 /// the same token.
|
|
328 bool HasMultipleNestedBlocks : 1;
|
|
329
|
|
330 /// The start of a nested block (e.g. lambda introducer in C++ or
|
|
331 /// "function" in JavaScript) is not wrapped to a new line.
|
|
332 bool NestedBlockInlined : 1;
|
|
333
|
|
334 /// \c true if the current \c ParenState represents an Objective-C
|
|
335 /// array literal.
|
|
336 bool IsInsideObjCArrayLiteral : 1;
|
|
337
|
173
|
338 bool IsCSharpGenericTypeConstraint : 1;
|
|
339
|
|
340 /// \brief true if the current \c ParenState represents the false branch of
|
|
341 /// a chained conditional expression (e.g. else-if)
|
|
342 bool IsChainedConditional : 1;
|
|
343
|
|
344 /// \brief true if there conditionnal was wrapped on the first operator (the
|
|
345 /// question mark)
|
|
346 bool IsWrappedConditional : 1;
|
|
347
|
|
348 /// \brief Indicates the indent should be reduced by the length of the
|
|
349 /// operator.
|
|
350 bool UnindentOperator : 1;
|
|
351
|
150
|
352 bool operator<(const ParenState &Other) const {
|
|
353 if (Indent != Other.Indent)
|
|
354 return Indent < Other.Indent;
|
|
355 if (LastSpace != Other.LastSpace)
|
|
356 return LastSpace < Other.LastSpace;
|
|
357 if (NestedBlockIndent != Other.NestedBlockIndent)
|
|
358 return NestedBlockIndent < Other.NestedBlockIndent;
|
|
359 if (FirstLessLess != Other.FirstLessLess)
|
|
360 return FirstLessLess < Other.FirstLessLess;
|
173
|
361 if (IsAligned != Other.IsAligned)
|
|
362 return IsAligned;
|
150
|
363 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
|
|
364 return BreakBeforeClosingBrace;
|
|
365 if (QuestionColumn != Other.QuestionColumn)
|
|
366 return QuestionColumn < Other.QuestionColumn;
|
|
367 if (AvoidBinPacking != Other.AvoidBinPacking)
|
|
368 return AvoidBinPacking;
|
|
369 if (BreakBeforeParameter != Other.BreakBeforeParameter)
|
|
370 return BreakBeforeParameter;
|
|
371 if (NoLineBreak != Other.NoLineBreak)
|
|
372 return NoLineBreak;
|
|
373 if (LastOperatorWrapped != Other.LastOperatorWrapped)
|
|
374 return LastOperatorWrapped;
|
|
375 if (ColonPos != Other.ColonPos)
|
|
376 return ColonPos < Other.ColonPos;
|
|
377 if (StartOfFunctionCall != Other.StartOfFunctionCall)
|
|
378 return StartOfFunctionCall < Other.StartOfFunctionCall;
|
|
379 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
|
|
380 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
|
|
381 if (CallContinuation != Other.CallContinuation)
|
|
382 return CallContinuation < Other.CallContinuation;
|
|
383 if (VariablePos != Other.VariablePos)
|
|
384 return VariablePos < Other.VariablePos;
|
|
385 if (ContainsLineBreak != Other.ContainsLineBreak)
|
|
386 return ContainsLineBreak;
|
|
387 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
|
|
388 return ContainsUnwrappedBuilder;
|
|
389 if (NestedBlockInlined != Other.NestedBlockInlined)
|
|
390 return NestedBlockInlined;
|
173
|
391 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
|
|
392 return IsCSharpGenericTypeConstraint;
|
|
393 if (IsChainedConditional != Other.IsChainedConditional)
|
|
394 return IsChainedConditional;
|
|
395 if (IsWrappedConditional != Other.IsWrappedConditional)
|
|
396 return IsWrappedConditional;
|
|
397 if (UnindentOperator != Other.UnindentOperator)
|
|
398 return UnindentOperator;
|
150
|
399 return false;
|
|
400 }
|
|
401 };
|
|
402
|
|
403 /// The current state when indenting a unwrapped line.
|
|
404 ///
|
|
405 /// As the indenting tries different combinations this is copied by value.
|
|
406 struct LineState {
|
|
407 /// The number of used columns in the current line.
|
|
408 unsigned Column;
|
|
409
|
|
410 /// The token that needs to be next formatted.
|
|
411 FormatToken *NextToken;
|
|
412
|
|
413 /// \c true if this line contains a continued for-loop section.
|
|
414 bool LineContainsContinuedForLoopSection;
|
|
415
|
|
416 /// \c true if \p NextToken should not continue this line.
|
|
417 bool NoContinuation;
|
|
418
|
|
419 /// The \c NestingLevel at the start of this line.
|
|
420 unsigned StartOfLineLevel;
|
|
421
|
|
422 /// The lowest \c NestingLevel on the current line.
|
|
423 unsigned LowestLevelOnLine;
|
|
424
|
|
425 /// The start column of the string literal, if we're in a string
|
|
426 /// literal sequence, 0 otherwise.
|
|
427 unsigned StartOfStringLiteral;
|
|
428
|
|
429 /// A stack keeping track of properties applying to parenthesis
|
|
430 /// levels.
|
|
431 std::vector<ParenState> Stack;
|
|
432
|
|
433 /// Ignore the stack of \c ParenStates for state comparison.
|
|
434 ///
|
|
435 /// In long and deeply nested unwrapped lines, the current algorithm can
|
|
436 /// be insufficient for finding the best formatting with a reasonable amount
|
|
437 /// of time and memory. Setting this flag will effectively lead to the
|
|
438 /// algorithm not analyzing some combinations. However, these combinations
|
|
439 /// rarely contain the optimal solution: In short, accepting a higher
|
|
440 /// penalty early would need to lead to different values in the \c
|
|
441 /// ParenState stack (in an otherwise identical state) and these different
|
|
442 /// values would need to lead to a significant amount of avoided penalty
|
|
443 /// later.
|
|
444 ///
|
|
445 /// FIXME: Come up with a better algorithm instead.
|
|
446 bool IgnoreStackForComparison;
|
|
447
|
|
448 /// The indent of the first token.
|
|
449 unsigned FirstIndent;
|
|
450
|
|
451 /// The line that is being formatted.
|
|
452 ///
|
|
453 /// Does not need to be considered for memoization because it doesn't change.
|
|
454 const AnnotatedLine *Line;
|
|
455
|
|
456 /// Comparison operator to be able to used \c LineState in \c map.
|
|
457 bool operator<(const LineState &Other) const {
|
|
458 if (NextToken != Other.NextToken)
|
|
459 return NextToken < Other.NextToken;
|
|
460 if (Column != Other.Column)
|
|
461 return Column < Other.Column;
|
|
462 if (LineContainsContinuedForLoopSection !=
|
|
463 Other.LineContainsContinuedForLoopSection)
|
|
464 return LineContainsContinuedForLoopSection;
|
|
465 if (NoContinuation != Other.NoContinuation)
|
|
466 return NoContinuation;
|
|
467 if (StartOfLineLevel != Other.StartOfLineLevel)
|
|
468 return StartOfLineLevel < Other.StartOfLineLevel;
|
|
469 if (LowestLevelOnLine != Other.LowestLevelOnLine)
|
|
470 return LowestLevelOnLine < Other.LowestLevelOnLine;
|
|
471 if (StartOfStringLiteral != Other.StartOfStringLiteral)
|
|
472 return StartOfStringLiteral < Other.StartOfStringLiteral;
|
|
473 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
|
|
474 return false;
|
|
475 return Stack < Other.Stack;
|
|
476 }
|
|
477 };
|
|
478
|
|
479 } // end namespace format
|
|
480 } // end namespace clang
|
|
481
|
|
482 #endif
|