Mercurial > hg > CbC > CbC_llvm
comparison clang-tools-extra/clangd/SourceCode.cpp @ 221:79ff65ed7e25
LLVM12 Original
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Jun 2021 19:15:29 +0900 |
parents | 0572611fdcc8 |
children | c4bab56944e8 |
comparison
equal
deleted
inserted
replaced
220:42394fc6a535 | 221:79ff65ed7e25 |
---|---|
6 // | 6 // |
7 //===----------------------------------------------------------------------===// | 7 //===----------------------------------------------------------------------===// |
8 #include "SourceCode.h" | 8 #include "SourceCode.h" |
9 | 9 |
10 #include "FuzzyMatch.h" | 10 #include "FuzzyMatch.h" |
11 #include "Preamble.h" | |
11 #include "Protocol.h" | 12 #include "Protocol.h" |
12 #include "refactor/Tweak.h" | 13 #include "refactor/Tweak.h" |
13 #include "support/Context.h" | 14 #include "support/Context.h" |
14 #include "support/Logger.h" | 15 #include "support/Logger.h" |
16 #include "support/Threading.h" | |
15 #include "clang/AST/ASTContext.h" | 17 #include "clang/AST/ASTContext.h" |
16 #include "clang/Basic/LangOptions.h" | 18 #include "clang/Basic/LangOptions.h" |
17 #include "clang/Basic/SourceLocation.h" | 19 #include "clang/Basic/SourceLocation.h" |
18 #include "clang/Basic/SourceManager.h" | 20 #include "clang/Basic/SourceManager.h" |
19 #include "clang/Basic/TokenKinds.h" | 21 #include "clang/Basic/TokenKinds.h" |
52 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. | 54 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. |
53 | 55 |
54 // Iterates over unicode codepoints in the (UTF-8) string. For each, | 56 // Iterates over unicode codepoints in the (UTF-8) string. For each, |
55 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true. | 57 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true. |
56 // Returns true if CB returned true, false if we hit the end of string. | 58 // Returns true if CB returned true, false if we hit the end of string. |
59 // | |
60 // If the string is not valid UTF-8, we log this error and "decode" the | |
61 // text in some arbitrary way. This is pretty sad, but this tends to happen deep | |
62 // within indexing of headers where clang misdetected the encoding, and | |
63 // propagating the error all the way back up is (probably?) not be worth it. | |
57 template <typename Callback> | 64 template <typename Callback> |
58 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { | 65 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { |
66 bool LoggedInvalid = false; | |
59 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). | 67 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). |
60 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. | 68 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. |
61 for (size_t I = 0; I < U8.size();) { | 69 for (size_t I = 0; I < U8.size();) { |
62 unsigned char C = static_cast<unsigned char>(U8[I]); | 70 unsigned char C = static_cast<unsigned char>(U8[I]); |
63 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character. | 71 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character. |
67 continue; | 75 continue; |
68 } | 76 } |
69 // This convenient property of UTF-8 holds for all non-ASCII characters. | 77 // This convenient property of UTF-8 holds for all non-ASCII characters. |
70 size_t UTF8Length = llvm::countLeadingOnes(C); | 78 size_t UTF8Length = llvm::countLeadingOnes(C); |
71 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here. | 79 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here. |
72 // 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug. | 80 // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*. |
73 assert((UTF8Length >= 2 && UTF8Length <= 4) && | 81 if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) { |
74 "Invalid UTF-8, or transcoding bug?"); | 82 if (!LoggedInvalid) { |
83 elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8)); | |
84 LoggedInvalid = true; | |
85 } | |
86 // We can't give a correct result, but avoid returning something wild. | |
87 // Pretend this is a valid ASCII byte, for lack of better options. | |
88 // (Too late to get ISO-8859-* right, we've skipped some bytes already). | |
89 if (CB(1, 1)) | |
90 return true; | |
91 ++I; | |
92 continue; | |
93 } | |
75 I += UTF8Length; // Skip over all trailing bytes. | 94 I += UTF8Length; // Skip over all trailing bytes. |
76 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). | 95 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). |
77 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...) | 96 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...) |
78 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1)) | 97 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1)) |
79 return true; | 98 return true; |
154 } | 173 } |
155 | 174 |
156 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, | 175 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, |
157 bool AllowColumnsBeyondLineLength) { | 176 bool AllowColumnsBeyondLineLength) { |
158 if (P.line < 0) | 177 if (P.line < 0) |
159 return llvm::make_error<llvm::StringError>( | 178 return error(llvm::errc::invalid_argument, |
160 llvm::formatv("Line value can't be negative ({0})", P.line), | 179 "Line value can't be negative ({0})", P.line); |
161 llvm::errc::invalid_argument); | |
162 if (P.character < 0) | 180 if (P.character < 0) |
163 return llvm::make_error<llvm::StringError>( | 181 return error(llvm::errc::invalid_argument, |
164 llvm::formatv("Character value can't be negative ({0})", P.character), | 182 "Character value can't be negative ({0})", P.character); |
165 llvm::errc::invalid_argument); | |
166 size_t StartOfLine = 0; | 183 size_t StartOfLine = 0; |
167 for (int I = 0; I != P.line; ++I) { | 184 for (int I = 0; I != P.line; ++I) { |
168 size_t NextNL = Code.find('\n', StartOfLine); | 185 size_t NextNL = Code.find('\n', StartOfLine); |
169 if (NextNL == llvm::StringRef::npos) | 186 if (NextNL == llvm::StringRef::npos) |
170 return llvm::make_error<llvm::StringError>( | 187 return error(llvm::errc::invalid_argument, |
171 llvm::formatv("Line value is out of range ({0})", P.line), | 188 "Line value is out of range ({0})", P.line); |
172 llvm::errc::invalid_argument); | |
173 StartOfLine = NextNL + 1; | 189 StartOfLine = NextNL + 1; |
174 } | 190 } |
175 StringRef Line = | 191 StringRef Line = |
176 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); | 192 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); |
177 | 193 |
178 // P.character may be in UTF-16, transcode if necessary. | 194 // P.character may be in UTF-16, transcode if necessary. |
179 bool Valid; | 195 bool Valid; |
180 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); | 196 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); |
181 if (!Valid && !AllowColumnsBeyondLineLength) | 197 if (!Valid && !AllowColumnsBeyondLineLength) |
182 return llvm::make_error<llvm::StringError>( | 198 return error(llvm::errc::invalid_argument, |
183 llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(), | 199 "{0} offset {1} is invalid for line {2}", lspEncoding(), |
184 P.character, P.line), | 200 P.character, P.line); |
185 llvm::errc::invalid_argument); | |
186 return StartOfLine + ByteInLine; | 201 return StartOfLine + ByteInLine; |
187 } | 202 } |
188 | 203 |
189 Position offsetToPosition(llvm::StringRef Code, size_t Offset) { | 204 Position offsetToPosition(llvm::StringRef Code, size_t Offset) { |
190 Offset = std::min(Code.size(), Offset); | 205 Offset = std::min(Code.size(), Offset); |
428 return Result; | 443 return Result; |
429 } | 444 } |
430 | 445 |
431 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) { | 446 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) { |
432 assert(isValidFileRange(SM, R)); | 447 assert(isValidFileRange(SM, R)); |
433 bool Invalid = false; | 448 auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin())); |
434 auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid); | 449 assert(Buf); |
435 assert(!Invalid); | |
436 | 450 |
437 size_t BeginOffset = SM.getFileOffset(R.getBegin()); | 451 size_t BeginOffset = SM.getFileOffset(R.getBegin()); |
438 size_t EndOffset = SM.getFileOffset(R.getEnd()); | 452 size_t EndOffset = SM.getFileOffset(R.getEnd()); |
439 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset); | 453 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset); |
440 } | 454 } |
441 | 455 |
442 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, | 456 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, |
443 Position P) { | 457 Position P) { |
444 llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer(); | 458 llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer(); |
445 auto Offset = | 459 auto Offset = |
446 positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false); | 460 positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false); |
447 if (!Offset) | 461 if (!Offset) |
448 return Offset.takeError(); | 462 return Offset.takeError(); |
449 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset); | 463 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset); |
556 return digest(Content); | 570 return digest(Content); |
557 } | 571 } |
558 | 572 |
559 format::FormatStyle getFormatStyleForFile(llvm::StringRef File, | 573 format::FormatStyle getFormatStyleForFile(llvm::StringRef File, |
560 llvm::StringRef Content, | 574 llvm::StringRef Content, |
561 llvm::vfs::FileSystem *FS) { | 575 const ThreadsafeFS &TFS) { |
562 auto Style = format::getStyle(format::DefaultFormatStyle, File, | 576 auto Style = format::getStyle(format::DefaultFormatStyle, File, |
563 format::DefaultFallbackStyle, Content, FS); | 577 format::DefaultFallbackStyle, Content, |
578 TFS.view(/*CWD=*/llvm::None).get()); | |
564 if (!Style) { | 579 if (!Style) { |
565 log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, | 580 log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, |
566 Style.takeError()); | 581 Style.takeError()); |
567 return format::getLLVMStyle(); | 582 return format::getLLVMStyle(); |
568 } | 583 } |
582 lex(llvm::StringRef Code, const LangOptions &LangOpts, | 597 lex(llvm::StringRef Code, const LangOptions &LangOpts, |
583 llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)> | 598 llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)> |
584 Action) { | 599 Action) { |
585 // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! | 600 // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! |
586 std::string NullTerminatedCode = Code.str(); | 601 std::string NullTerminatedCode = Code.str(); |
587 SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode); | 602 SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode); |
588 auto &SM = FileSM.get(); | 603 auto &SM = FileSM.get(); |
589 for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts)) | 604 for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts)) |
590 Action(Tok, SM); | 605 Action(Tok, SM); |
591 } | 606 } |
592 | 607 |
613 if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier) | 628 if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier) |
614 return; | 629 return; |
615 Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM))); | 630 Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM))); |
616 }); | 631 }); |
617 return Ranges; | 632 return Ranges; |
633 } | |
634 | |
635 bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) { | |
636 // Keywords are initialized in constructor. | |
637 clang::IdentifierTable KeywordsTable(LangOpts); | |
638 return KeywordsTable.find(NewName) != KeywordsTable.end(); | |
618 } | 639 } |
619 | 640 |
620 namespace { | 641 namespace { |
621 struct NamespaceEvent { | 642 struct NamespaceEvent { |
622 enum { | 643 enum { |
756 } | 777 } |
757 }); | 778 }); |
758 } | 779 } |
759 | 780 |
760 // Returns the prefix namespaces of NS: {"" ... NS}. | 781 // Returns the prefix namespaces of NS: {"" ... NS}. |
761 llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) { | 782 llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) { |
762 llvm::SmallVector<llvm::StringRef, 8> Results; | 783 llvm::SmallVector<llvm::StringRef> Results; |
763 Results.push_back(NS.take_front(0)); | 784 Results.push_back(NS.take_front(0)); |
764 NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); | 785 NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
765 for (llvm::StringRef &R : Results) | 786 for (llvm::StringRef &R : Results) |
766 R = NS.take_front(R.end() - NS.begin()); | 787 R = NS.take_front(R.end() - NS.begin()); |
767 return Results; | 788 return Results; |
952 const auto &SM = PP.getSourceManager(); | 973 const auto &SM = PP.getSourceManager(); |
953 IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM)); | 974 IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM)); |
954 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) | 975 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) |
955 return None; | 976 return None; |
956 | 977 |
957 // Get the definition just before the searched location so that a macro | 978 // We need to take special case to handle #define and #undef. |
958 // referenced in a '#undef MACRO' can still be found. Note that we only do | 979 // Preprocessor::getMacroDefinitionAtLoc() only considers a macro |
959 // that if Loc is not pointing at start of file. | 980 // definition to be in scope *after* the location of the macro name in a |
960 if (SM.getLocForStartOfFile(SM.getFileID(Loc)) != Loc) | 981 // #define that introduces it, and *before* the location of the macro name |
961 Loc = Loc.getLocWithOffset(-1); | 982 // in an #undef that undefines it. To handle these cases, we check for |
962 MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(IdentifierInfo, Loc); | 983 // the macro being in scope either just after or just before the location |
963 if (auto *MI = MacroDef.getMacroInfo()) | 984 // of the token. In getting the location before, we also take care to check |
964 return DefinedMacro{IdentifierInfo->getName(), MI}; | 985 // for start-of-file. |
965 return None; | 986 FileID FID = SM.getFileID(Loc); |
987 assert(Loc != SM.getLocForEndOfFile(FID)); | |
988 SourceLocation JustAfterToken = Loc.getLocWithOffset(1); | |
989 auto *MacroInfo = | |
990 PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo(); | |
991 if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) { | |
992 SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1); | |
993 MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken) | |
994 .getMacroInfo(); | |
995 } | |
996 if (!MacroInfo) { | |
997 return None; | |
998 } | |
999 return DefinedMacro{ | |
1000 IdentifierInfo->getName(), MacroInfo, | |
1001 translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)}; | |
966 } | 1002 } |
967 | 1003 |
968 llvm::Expected<std::string> Edit::apply() const { | 1004 llvm::Expected<std::string> Edit::apply() const { |
969 return tooling::applyAllReplacements(InitialCode, Replacements); | 1005 return tooling::applyAllReplacements(InitialCode, Replacements); |
970 } | 1006 } |
1012 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) { | 1048 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) { |
1013 if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style)) | 1049 if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style)) |
1014 E.Replacements = std::move(*NewEdits); | 1050 E.Replacements = std::move(*NewEdits); |
1015 else | 1051 else |
1016 return NewEdits.takeError(); | 1052 return NewEdits.takeError(); |
1053 return llvm::Error::success(); | |
1054 } | |
1055 | |
1056 llvm::Error applyChange(std::string &Contents, | |
1057 const TextDocumentContentChangeEvent &Change) { | |
1058 if (!Change.range) { | |
1059 Contents = Change.text; | |
1060 return llvm::Error::success(); | |
1061 } | |
1062 | |
1063 const Position &Start = Change.range->start; | |
1064 llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false); | |
1065 if (!StartIndex) | |
1066 return StartIndex.takeError(); | |
1067 | |
1068 const Position &End = Change.range->end; | |
1069 llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false); | |
1070 if (!EndIndex) | |
1071 return EndIndex.takeError(); | |
1072 | |
1073 if (*EndIndex < *StartIndex) | |
1074 return error(llvm::errc::invalid_argument, | |
1075 "Range's end position ({0}) is before start position ({1})", | |
1076 End, Start); | |
1077 | |
1078 // Since the range length between two LSP positions is dependent on the | |
1079 // contents of the buffer we compute the range length between the start and | |
1080 // end position ourselves and compare it to the range length of the LSP | |
1081 // message to verify the buffers of the client and server are in sync. | |
1082 | |
1083 // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16 | |
1084 // code units. | |
1085 ssize_t ComputedRangeLength = | |
1086 lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex)); | |
1087 | |
1088 if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength) | |
1089 return error(llvm::errc::invalid_argument, | |
1090 "Change's rangeLength ({0}) doesn't match the " | |
1091 "computed range length ({1}).", | |
1092 *Change.rangeLength, ComputedRangeLength); | |
1093 | |
1094 Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text); | |
1095 | |
1017 return llvm::Error::success(); | 1096 return llvm::Error::success(); |
1018 } | 1097 } |
1019 | 1098 |
1020 EligibleRegion getEligiblePoints(llvm::StringRef Code, | 1099 EligibleRegion getEligiblePoints(llvm::StringRef Code, |
1021 llvm::StringRef FullyQualifiedName, | 1100 llvm::StringRef FullyQualifiedName, |