150
|
1 //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 ///
|
|
9 /// \file
|
|
10 /// WhitespaceManager class manages whitespace around tokens and their
|
|
11 /// replacements.
|
|
12 ///
|
|
13 //===----------------------------------------------------------------------===//
|
|
14
|
|
15 #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
|
|
16 #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
|
|
17
|
|
18 #include "TokenAnnotator.h"
|
|
19 #include "clang/Basic/SourceManager.h"
|
|
20 #include "clang/Format/Format.h"
|
223
|
21 #include "llvm/ADT/SmallVector.h"
|
|
22 #include <algorithm>
|
150
|
23 #include <string>
|
173
|
24 #include <tuple>
|
150
|
25
|
|
26 namespace clang {
|
|
27 namespace format {
|
|
28
|
|
29 /// Manages the whitespaces around tokens and their replacements.
|
|
30 ///
|
|
31 /// This includes special handling for certain constructs, e.g. the alignment of
|
|
32 /// trailing line comments.
|
|
33 ///
|
|
34 /// To guarantee correctness of alignment operations, the \c WhitespaceManager
|
|
35 /// must be informed about every token in the source file; for each token, there
|
|
36 /// must be exactly one call to either \c replaceWhitespace or
|
|
37 /// \c addUntouchableToken.
|
|
38 ///
|
|
39 /// There may be multiple calls to \c breakToken for a given token.
|
|
40 class WhitespaceManager {
|
|
41 public:
|
|
42 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
|
|
43 bool UseCRLF)
|
|
44 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
|
|
45
|
|
46 bool useCRLF() const { return UseCRLF; }
|
|
47
|
|
48 /// Replaces the whitespace in front of \p Tok. Only call once for
|
|
49 /// each \c AnnotatedToken.
|
|
50 ///
|
|
51 /// \p StartOfTokenColumn is the column at which the token will start after
|
|
52 /// this replacement. It is needed for determining how \p Spaces is turned
|
|
53 /// into tabs and spaces for some format styles.
|
|
54 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
|
173
|
55 unsigned StartOfTokenColumn, bool isAligned = false,
|
150
|
56 bool InPPDirective = false);
|
|
57
|
|
58 /// Adds information about an unchangeable token's whitespace.
|
|
59 ///
|
|
60 /// Needs to be called for every token for which \c replaceWhitespace
|
|
61 /// was not called.
|
|
62 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
|
|
63
|
|
64 llvm::Error addReplacement(const tooling::Replacement &Replacement);
|
|
65
|
|
66 /// Inserts or replaces whitespace in the middle of a token.
|
|
67 ///
|
|
68 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
|
|
69 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
|
|
70 /// characters.
|
|
71 ///
|
|
72 /// Note: \p Spaces can be negative to retain information about initial
|
|
73 /// relative column offset between a line of a block comment and the start of
|
|
74 /// the comment. This negative offset may be compensated by trailing comment
|
|
75 /// alignment here. In all other cases negative \p Spaces will be truncated to
|
|
76 /// 0.
|
|
77 ///
|
|
78 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
|
|
79 /// used to align backslashes correctly.
|
|
80 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
|
|
81 unsigned ReplaceChars,
|
|
82 StringRef PreviousPostfix,
|
|
83 StringRef CurrentPrefix, bool InPPDirective,
|
|
84 unsigned Newlines, int Spaces);
|
|
85
|
|
86 /// Returns all the \c Replacements created during formatting.
|
|
87 const tooling::Replacements &generateReplacements();
|
|
88
|
|
89 /// Represents a change before a token, a break inside a token,
|
|
90 /// or the layout of an unchanged token (or whitespace within).
|
|
91 struct Change {
|
|
92 /// Functor to sort changes in original source order.
|
|
93 class IsBeforeInFile {
|
|
94 public:
|
|
95 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
|
|
96 bool operator()(const Change &C1, const Change &C2) const;
|
|
97
|
|
98 private:
|
|
99 const SourceManager &SourceMgr;
|
|
100 };
|
|
101
|
|
102 /// Creates a \c Change.
|
|
103 ///
|
|
104 /// The generated \c Change will replace the characters at
|
|
105 /// \p OriginalWhitespaceRange with a concatenation of
|
|
106 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
|
|
107 /// and \p CurrentLinePrefix.
|
|
108 ///
|
|
109 /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
|
|
110 /// trailing comments and escaped newlines.
|
|
111 Change(const FormatToken &Tok, bool CreateReplacement,
|
|
112 SourceRange OriginalWhitespaceRange, int Spaces,
|
|
113 unsigned StartOfTokenColumn, unsigned NewlinesBefore,
|
|
114 StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
|
173
|
115 bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken);
|
150
|
116
|
|
117 // The kind of the token whose whitespace this change replaces, or in which
|
|
118 // this change inserts whitespace.
|
|
119 // FIXME: Currently this is not set correctly for breaks inside comments, as
|
|
120 // the \c BreakableToken is still doing its own alignment.
|
|
121 const FormatToken *Tok;
|
|
122
|
|
123 bool CreateReplacement;
|
|
124 // Changes might be in the middle of a token, so we cannot just keep the
|
|
125 // FormatToken around to query its information.
|
|
126 SourceRange OriginalWhitespaceRange;
|
|
127 unsigned StartOfTokenColumn;
|
|
128 unsigned NewlinesBefore;
|
|
129 std::string PreviousLinePostfix;
|
|
130 std::string CurrentLinePrefix;
|
173
|
131 bool IsAligned;
|
150
|
132 bool ContinuesPPDirective;
|
|
133
|
|
134 // The number of spaces in front of the token or broken part of the token.
|
|
135 // This will be adapted when aligning tokens.
|
|
136 // Can be negative to retain information about the initial relative offset
|
|
137 // of the lines in a block comment. This is used when aligning trailing
|
|
138 // comments. Uncompensated negative offset is truncated to 0.
|
|
139 int Spaces;
|
|
140
|
|
141 // If this change is inside of a token but not at the start of the token or
|
|
142 // directly after a newline.
|
|
143 bool IsInsideToken;
|
|
144
|
|
145 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
|
|
146 // \c EscapedNewlineColumn will be calculated in
|
|
147 // \c calculateLineBreakInformation.
|
|
148 bool IsTrailingComment;
|
|
149 unsigned TokenLength;
|
|
150 unsigned PreviousEndOfTokenColumn;
|
|
151 unsigned EscapedNewlineColumn;
|
|
152
|
|
153 // These fields are used to retain correct relative line indentation in a
|
|
154 // block comment when aligning trailing comments.
|
|
155 //
|
|
156 // If this Change represents a continuation of a block comment,
|
|
157 // \c StartOfBlockComment is pointer to the first Change in the block
|
|
158 // comment. \c IndentationOffset is a relative column offset to this
|
|
159 // change, so that the correct column can be reconstructed at the end of
|
|
160 // the alignment process.
|
|
161 const Change *StartOfBlockComment;
|
|
162 int IndentationOffset;
|
|
163
|
173
|
164 // Depth of conditionals. Computed from tracking fake parenthesis, except
|
|
165 // it does not increase the indent for "chained" conditionals.
|
|
166 int ConditionalsLevel;
|
|
167
|
|
168 // A combination of indent, nesting and conditionals levels, which are used
|
|
169 // in tandem to compute lexical scope, for the purposes of deciding
|
150
|
170 // when to stop consecutive alignment runs.
|
173
|
171 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
|
|
172 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
|
|
173 ConditionalsLevel);
|
150
|
174 }
|
|
175 };
|
|
176
|
|
177 private:
|
223
|
178 struct CellDescription {
|
|
179 unsigned Index = 0;
|
|
180 unsigned Cell = 0;
|
|
181 unsigned EndIndex = 0;
|
|
182 bool HasSplit = false;
|
|
183 CellDescription *NextColumnElement = nullptr;
|
|
184
|
|
185 constexpr bool operator==(const CellDescription &Other) const {
|
|
186 return Index == Other.Index && Cell == Other.Cell &&
|
|
187 EndIndex == Other.EndIndex;
|
|
188 }
|
|
189 constexpr bool operator!=(const CellDescription &Other) const {
|
|
190 return !(*this == Other);
|
|
191 }
|
|
192 };
|
|
193
|
|
194 struct CellDescriptions {
|
|
195 SmallVector<CellDescription> Cells;
|
|
196 unsigned CellCount = 0;
|
|
197 unsigned InitialSpaces = 0;
|
|
198 };
|
|
199
|
150
|
200 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
|
|
201 /// or token parts in a line and \c PreviousEndOfTokenColumn and
|
|
202 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
|
|
203 void calculateLineBreakInformation();
|
|
204
|
|
205 /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
|
|
206 void alignConsecutiveMacros();
|
|
207
|
|
208 /// Align consecutive assignments over all \c Changes.
|
|
209 void alignConsecutiveAssignments();
|
|
210
|
173
|
211 /// Align consecutive bitfields over all \c Changes.
|
|
212 void alignConsecutiveBitFields();
|
|
213
|
150
|
214 /// Align consecutive declarations over all \c Changes.
|
|
215 void alignConsecutiveDeclarations();
|
|
216
|
173
|
217 /// Align consecutive declarations over all \c Changes.
|
|
218 void alignChainedConditionals();
|
|
219
|
150
|
220 /// Align trailing comments over all \c Changes.
|
|
221 void alignTrailingComments();
|
|
222
|
|
223 /// Align trailing comments from change \p Start to change \p End at
|
|
224 /// the specified \p Column.
|
|
225 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
|
|
226
|
|
227 /// Align escaped newlines over all \c Changes.
|
|
228 void alignEscapedNewlines();
|
|
229
|
|
230 /// Align escaped newlines from change \p Start to change \p End at
|
|
231 /// the specified \p Column.
|
|
232 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
|
|
233
|
223
|
234 /// Align Array Initializers over all \c Changes.
|
|
235 void alignArrayInitializers();
|
|
236
|
|
237 /// Align Array Initializers from change \p Start to change \p End at
|
|
238 /// the specified \p Column.
|
|
239 void alignArrayInitializers(unsigned Start, unsigned End);
|
|
240
|
|
241 /// Align Array Initializers being careful to right justify the columns
|
|
242 /// as described by \p CellDescs.
|
|
243 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
|
|
244
|
|
245 /// Align Array Initializers being careful to leftt justify the columns
|
|
246 /// as described by \p CellDescs.
|
|
247 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
|
|
248
|
|
249 /// Calculate the cell width between two indexes.
|
|
250 unsigned calculateCellWidth(unsigned Start, unsigned End,
|
|
251 bool WithSpaces = false) const;
|
|
252
|
|
253 /// Get a set of fully specified CellDescriptions between \p Start and
|
|
254 /// \p End of the change list.
|
|
255 CellDescriptions getCells(unsigned Start, unsigned End);
|
|
256
|
|
257 /// Does this \p Cell contain a split element?
|
|
258 static bool isSplitCell(const CellDescription &Cell);
|
|
259
|
|
260 /// Get the width of the preceeding cells from \p Start to \p End.
|
|
261 template <typename I>
|
|
262 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
|
|
263 auto NetWidth = InitialSpaces;
|
|
264 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
|
|
265 // If we broke the line the initial spaces are already
|
|
266 // accounted for.
|
|
267 if (Changes[PrevIter->Index].NewlinesBefore > 0)
|
|
268 NetWidth = 0;
|
|
269 NetWidth +=
|
|
270 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
|
|
271 }
|
|
272 return NetWidth;
|
|
273 }
|
|
274
|
|
275 /// Get the maximum width of a cell in a sequence of columns.
|
|
276 template <typename I>
|
|
277 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
|
|
278 unsigned CellWidth =
|
|
279 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
|
|
280 if (Changes[CellIter->Index].NewlinesBefore == 0)
|
|
281 CellWidth += NetWidth;
|
|
282 for (const auto *Next = CellIter->NextColumnElement; Next != nullptr;
|
|
283 Next = Next->NextColumnElement) {
|
|
284 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
|
|
285 if (Changes[Next->Index].NewlinesBefore == 0)
|
|
286 ThisWidth += NetWidth;
|
|
287 CellWidth = std::max(CellWidth, ThisWidth);
|
|
288 }
|
|
289 return CellWidth;
|
|
290 }
|
|
291
|
|
292 /// Get The maximum width of all columns to a given cell.
|
|
293 template <typename I>
|
|
294 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
|
|
295 unsigned InitialSpaces,
|
|
296 unsigned CellCount) const {
|
|
297 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
|
|
298 auto RowCount = 1U;
|
|
299 auto Offset = std::distance(CellStart, CellStop);
|
|
300 for (const auto *Next = CellStop->NextColumnElement; Next != nullptr;
|
|
301 Next = Next->NextColumnElement) {
|
|
302 auto Start = (CellStart + RowCount * CellCount);
|
|
303 auto End = Start + Offset;
|
|
304 MaxNetWidth =
|
|
305 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
|
|
306 ++RowCount;
|
|
307 }
|
|
308 return MaxNetWidth;
|
|
309 }
|
|
310
|
|
311 /// Align a split cell with a newline to the first element in the cell.
|
|
312 void alignToStartOfCell(unsigned Start, unsigned End);
|
|
313
|
|
314 /// Link the Cell pointers in the list of Cells.
|
|
315 static CellDescriptions linkCells(CellDescriptions &&CellDesc);
|
|
316
|
150
|
317 /// Fill \c Replaces with the replacements for all effective changes.
|
|
318 void generateChanges();
|
|
319
|
|
320 /// Stores \p Text as the replacement for the whitespace in \p Range.
|
|
321 void storeReplacement(SourceRange Range, StringRef Text);
|
|
322 void appendNewlineText(std::string &Text, unsigned Newlines);
|
|
323 void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
|
|
324 unsigned PreviousEndOfTokenColumn,
|
|
325 unsigned EscapedNewlineColumn);
|
|
326 void appendIndentText(std::string &Text, unsigned IndentLevel,
|
173
|
327 unsigned Spaces, unsigned WhitespaceStartColumn,
|
|
328 bool IsAligned);
|
|
329 unsigned appendTabIndent(std::string &Text, unsigned Spaces,
|
|
330 unsigned Indentation);
|
150
|
331
|
|
332 SmallVector<Change, 16> Changes;
|
|
333 const SourceManager &SourceMgr;
|
|
334 tooling::Replacements Replaces;
|
|
335 const FormatStyle &Style;
|
|
336 bool UseCRLF;
|
|
337 };
|
|
338
|
|
339 } // namespace format
|
|
340 } // namespace clang
|
|
341
|
|
342 #endif
|