Mercurial > hg > CbC > CbC_llvm
diff clang-tools-extra/clangd/SourceCode.cpp @ 221:79ff65ed7e25
LLVM12 Original
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Jun 2021 19:15:29 +0900 |
parents | 0572611fdcc8 |
children | c4bab56944e8 |
line wrap: on
line diff
--- a/clang-tools-extra/clangd/SourceCode.cpp Tue Jun 15 19:13:43 2021 +0900 +++ b/clang-tools-extra/clangd/SourceCode.cpp Tue Jun 15 19:15:29 2021 +0900 @@ -8,10 +8,12 @@ #include "SourceCode.h" #include "FuzzyMatch.h" +#include "Preamble.h" #include "Protocol.h" #include "refactor/Tweak.h" #include "support/Context.h" #include "support/Logger.h" +#include "support/Threading.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" @@ -54,8 +56,14 @@ // Iterates over unicode codepoints in the (UTF-8) string. For each, // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true. // Returns true if CB returned true, false if we hit the end of string. +// +// If the string is not valid UTF-8, we log this error and "decode" the +// text in some arbitrary way. This is pretty sad, but this tends to happen deep +// within indexing of headers where clang misdetected the encoding, and +// propagating the error all the way back up is (probably?) not be worth it. template <typename Callback> static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { + bool LoggedInvalid = false; // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. for (size_t I = 0; I < U8.size();) { @@ -69,9 +77,20 @@ // This convenient property of UTF-8 holds for all non-ASCII characters. size_t UTF8Length = llvm::countLeadingOnes(C); // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here. - // 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug. - assert((UTF8Length >= 2 && UTF8Length <= 4) && - "Invalid UTF-8, or transcoding bug?"); + // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*. + if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) { + if (!LoggedInvalid) { + elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8)); + LoggedInvalid = true; + } + // We can't give a correct result, but avoid returning something wild. + // Pretend this is a valid ASCII byte, for lack of better options. + // (Too late to get ISO-8859-* right, we've skipped some bytes already). + if (CB(1, 1)) + return true; + ++I; + continue; + } I += UTF8Length; // Skip over all trailing bytes. // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...) @@ -156,20 +175,17 @@ llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, bool AllowColumnsBeyondLineLength) { if (P.line < 0) - return llvm::make_error<llvm::StringError>( - llvm::formatv("Line value can't be negative ({0})", P.line), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Line value can't be negative ({0})", P.line); if (P.character < 0) - return llvm::make_error<llvm::StringError>( - llvm::formatv("Character value can't be negative ({0})", P.character), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Character value can't be negative ({0})", P.character); size_t StartOfLine = 0; for (int I = 0; I != P.line; ++I) { size_t NextNL = Code.find('\n', StartOfLine); if (NextNL == llvm::StringRef::npos) - return llvm::make_error<llvm::StringError>( - llvm::formatv("Line value is out of range ({0})", P.line), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "Line value is out of range ({0})", P.line); StartOfLine = NextNL + 1; } StringRef Line = @@ -179,10 +195,9 @@ bool Valid; size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); if (!Valid && !AllowColumnsBeyondLineLength) - return llvm::make_error<llvm::StringError>( - llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(), - P.character, P.line), - llvm::errc::invalid_argument); + return error(llvm::errc::invalid_argument, + "{0} offset {1} is invalid for line {2}", lspEncoding(), + P.character, P.line); return StartOfLine + ByteInLine; } @@ -430,9 +445,8 @@ llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) { assert(isValidFileRange(SM, R)); - bool Invalid = false; - auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid); - assert(!Invalid); + auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin())); + assert(Buf); size_t BeginOffset = SM.getFileOffset(R.getBegin()); size_t EndOffset = SM.getFileOffset(R.getEnd()); @@ -441,7 +455,7 @@ llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, Position P) { - llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer(); + llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer(); auto Offset = positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false); if (!Offset) @@ -558,9 +572,10 @@ format::FormatStyle getFormatStyleForFile(llvm::StringRef File, llvm::StringRef Content, - llvm::vfs::FileSystem *FS) { + const ThreadsafeFS &TFS) { auto Style = format::getStyle(format::DefaultFormatStyle, File, - format::DefaultFallbackStyle, Content, FS); + format::DefaultFallbackStyle, Content, + TFS.view(/*CWD=*/llvm::None).get()); if (!Style) { log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, Style.takeError()); @@ -584,7 +599,7 @@ Action) { // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! std::string NullTerminatedCode = Code.str(); - SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode); + SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode); auto &SM = FileSM.get(); for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts)) Action(Tok, SM); @@ -617,6 +632,12 @@ return Ranges; } +bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) { + // Keywords are initialized in constructor. + clang::IdentifierTable KeywordsTable(LangOpts); + return KeywordsTable.find(NewName) != KeywordsTable.end(); +} + namespace { struct NamespaceEvent { enum { @@ -758,8 +779,8 @@ } // Returns the prefix namespaces of NS: {"" ... NS}. -llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) { - llvm::SmallVector<llvm::StringRef, 8> Results; +llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) { + llvm::SmallVector<llvm::StringRef> Results; Results.push_back(NS.take_front(0)); NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); for (llvm::StringRef &R : Results) @@ -954,15 +975,30 @@ if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) return None; - // Get the definition just before the searched location so that a macro - // referenced in a '#undef MACRO' can still be found. Note that we only do - // that if Loc is not pointing at start of file. - if (SM.getLocForStartOfFile(SM.getFileID(Loc)) != Loc) - Loc = Loc.getLocWithOffset(-1); - MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(IdentifierInfo, Loc); - if (auto *MI = MacroDef.getMacroInfo()) - return DefinedMacro{IdentifierInfo->getName(), MI}; - return None; + // We need to take special case to handle #define and #undef. + // Preprocessor::getMacroDefinitionAtLoc() only considers a macro + // definition to be in scope *after* the location of the macro name in a + // #define that introduces it, and *before* the location of the macro name + // in an #undef that undefines it. To handle these cases, we check for + // the macro being in scope either just after or just before the location + // of the token. In getting the location before, we also take care to check + // for start-of-file. + FileID FID = SM.getFileID(Loc); + assert(Loc != SM.getLocForEndOfFile(FID)); + SourceLocation JustAfterToken = Loc.getLocWithOffset(1); + auto *MacroInfo = + PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo(); + if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) { + SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1); + MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken) + .getMacroInfo(); + } + if (!MacroInfo) { + return None; + } + return DefinedMacro{ + IdentifierInfo->getName(), MacroInfo, + translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)}; } llvm::Expected<std::string> Edit::apply() const { @@ -1017,6 +1053,49 @@ return llvm::Error::success(); } +llvm::Error applyChange(std::string &Contents, + const TextDocumentContentChangeEvent &Change) { + if (!Change.range) { + Contents = Change.text; + return llvm::Error::success(); + } + + const Position &Start = Change.range->start; + llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false); + if (!StartIndex) + return StartIndex.takeError(); + + const Position &End = Change.range->end; + llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false); + if (!EndIndex) + return EndIndex.takeError(); + + if (*EndIndex < *StartIndex) + return error(llvm::errc::invalid_argument, + "Range's end position ({0}) is before start position ({1})", + End, Start); + + // Since the range length between two LSP positions is dependent on the + // contents of the buffer we compute the range length between the start and + // end position ourselves and compare it to the range length of the LSP + // message to verify the buffers of the client and server are in sync. + + // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16 + // code units. + ssize_t ComputedRangeLength = + lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex)); + + if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength) + return error(llvm::errc::invalid_argument, + "Change's rangeLength ({0}) doesn't match the " + "computed range length ({1}).", + *Change.rangeLength, ComputedRangeLength); + + Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text); + + return llvm::Error::success(); +} + EligibleRegion getEligiblePoints(llvm::StringRef Code, llvm::StringRef FullyQualifiedName, const LangOptions &LangOpts) {