CbC/CbC_llvm: clang-tools-extra/clangd/SourceCode.cpp comparison

comparison clang-tools-extra/clangd/SourceCode.cpp @ 221:79ff65ed7e25

LLVM12 Original

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Tue, 15 Jun 2021 19:15:29 +0900
parents	0572611fdcc8
children	c4bab56944e8

comparison

equal deleted inserted replaced

-:42394fc6a535
+:79ff65ed7e25
 //
 //===----------------------------------------------------------------------===//
 #include "SourceCode.h"
 #include "FuzzyMatch.h"
+#include "Preamble.h"
 #include "Protocol.h"
 #include "refactor/Tweak.h"
 #include "support/Context.h"
 #include "support/Logger.h"
+#include "support/Threading.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
 // Iterates over unicode codepoints in the (UTF-8) string. For each,
 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
 // Returns true if CB returned true, false if we hit the end of string.
+//
+// If the string is not valid UTF-8, we log this error and "decode" the
+// text in some arbitrary way. This is pretty sad, but this tends to happen deep
+// within indexing of headers where clang misdetected the encoding, and
+// propagating the error all the way back up is (probably?) not be worth it.
 template <typename Callback>
 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
+bool LoggedInvalid = false;
 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
 for (size_t I = 0; I < U8.size();) {
 unsigned char C = static_cast<unsigned char>(U8[I]);
 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character.
 continue;
 }
 // This convenient property of UTF-8 holds for all non-ASCII characters.
 size_t UTF8Length = llvm::countLeadingOnes(C);
 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
-// 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug.
+// 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*.
-assert((UTF8Length >= 2 && UTF8Length <= 4) &&
+if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) {
-"Invalid UTF-8, or transcoding bug?");
+if (!LoggedInvalid) {
+elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8));
+LoggedInvalid = true;
+}
+// We can't give a correct result, but avoid returning something wild.
+// Pretend this is a valid ASCII byte, for lack of better options.
+// (Too late to get ISO-8859-* right, we've skipped some bytes already).
+if (CB(1, 1))
+return true;
+++I;
+continue;
+}
 I += UTF8Length; // Skip over all trailing bytes.
 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
 return true;
 }
 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
 bool AllowColumnsBeyondLineLength) {
 if (P.line < 0)
-return llvm::make_error<llvm::StringError>(
+return error(llvm::errc::invalid_argument,
-llvm::formatv("Line value can't be negative ({0})", P.line),
+"Line value can't be negative ({0})", P.line);
-llvm::errc::invalid_argument);
 if (P.character < 0)
-return llvm::make_error<llvm::StringError>(
+return error(llvm::errc::invalid_argument,
-llvm::formatv("Character value can't be negative ({0})", P.character),
+"Character value can't be negative ({0})", P.character);
-llvm::errc::invalid_argument);
 size_t StartOfLine = 0;
 for (int I = 0; I != P.line; ++I) {
 size_t NextNL = Code.find('\n', StartOfLine);
 if (NextNL == llvm::StringRef::npos)
-return llvm::make_error<llvm::StringError>(
+return error(llvm::errc::invalid_argument,
-llvm::formatv("Line value is out of range ({0})", P.line),
+"Line value is out of range ({0})", P.line);
-llvm::errc::invalid_argument);
 StartOfLine = NextNL + 1;
 }
 StringRef Line =
 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; });
 // P.character may be in UTF-16, transcode if necessary.
 bool Valid;
 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
 if (!Valid && !AllowColumnsBeyondLineLength)
-return llvm::make_error<llvm::StringError>(
+return error(llvm::errc::invalid_argument,
-llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(),
+"{0} offset {1} is invalid for line {2}", lspEncoding(),
-P.character, P.line),
+P.character, P.line);
-llvm::errc::invalid_argument);
 return StartOfLine + ByteInLine;
 }
 Position offsetToPosition(llvm::StringRef Code, size_t Offset) {
 Offset = std::min(Code.size(), Offset);
 return Result;
 }
 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) {
 assert(isValidFileRange(SM, R));
-bool Invalid = false;
+auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin()));
-auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid);
+assert(Buf);
-assert(!Invalid);
 size_t BeginOffset = SM.getFileOffset(R.getBegin());
 size_t EndOffset = SM.getFileOffset(R.getEnd());
 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
 }
 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
 Position P) {
-llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer();
+llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer();
 auto Offset =
 positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false);
 if (!Offset)
 return Offset.takeError();
 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset);
 return digest(Content);
 }
 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
 llvm::StringRef Content,
-llvm::vfs::FileSystem *FS) {
+const ThreadsafeFS &TFS) {
 auto Style = format::getStyle(format::DefaultFormatStyle, File,
-format::DefaultFallbackStyle, Content, FS);
+format::DefaultFallbackStyle, Content,
+TFS.view(/*CWD=*/llvm::None).get());
 if (!Style) {
 log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
 Style.takeError());
 return format::getLLVMStyle();
 }
 lex(llvm::StringRef Code, const LangOptions &LangOpts,
 llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)>
 Action) {
 // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
 std::string NullTerminatedCode = Code.str();
-SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
+SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode);
 auto &SM = FileSM.get();
 for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
 Action(Tok, SM);
 }
 if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier)
 return;
 Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
 });
 return Ranges;
+}
+bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) {
+// Keywords are initialized in constructor.
+clang::IdentifierTable KeywordsTable(LangOpts);
+return KeywordsTable.find(NewName) != KeywordsTable.end();
 }
 namespace {
 struct NamespaceEvent {
 enum {
 }
 });
 }
 // Returns the prefix namespaces of NS: {"" ... NS}.
-llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) {
+llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) {
-llvm::SmallVector<llvm::StringRef, 8> Results;
+llvm::SmallVector<llvm::StringRef> Results;
 Results.push_back(NS.take_front(0));
 NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
 for (llvm::StringRef &R : Results)
 R = NS.take_front(R.end() - NS.begin());
 return Results;
 const auto &SM = PP.getSourceManager();
 IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM));
 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition())
 return None;
-// Get the definition just before the searched location so that a macro
+// We need to take special case to handle #define and #undef.
-// referenced in a '#undef MACRO' can still be found. Note that we only do
+// Preprocessor::getMacroDefinitionAtLoc() only considers a macro
-// that if Loc is not pointing at start of file.
+// definition to be in scope *after* the location of the macro name in a
-if (SM.getLocForStartOfFile(SM.getFileID(Loc)) != Loc)
+// #define that introduces it, and *before* the location of the macro name
-Loc = Loc.getLocWithOffset(-1);
+// in an #undef that undefines it. To handle these cases, we check for
-MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(IdentifierInfo, Loc);
+// the macro being in scope either just after or just before the location
-if (auto *MI = MacroDef.getMacroInfo())
+// of the token. In getting the location before, we also take care to check
-return DefinedMacro{IdentifierInfo->getName(), MI};
+// for start-of-file.
-return None;
+FileID FID = SM.getFileID(Loc);
+assert(Loc != SM.getLocForEndOfFile(FID));
+SourceLocation JustAfterToken = Loc.getLocWithOffset(1);
+auto *MacroInfo =
+PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo();
+if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) {
+SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1);
+MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken)
+.getMacroInfo();
+}
+if (!MacroInfo) {
+return None;
+}
+return DefinedMacro{
+IdentifierInfo->getName(), MacroInfo,
+translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)};
 }
 llvm::Expected<std::string> Edit::apply() const {
 return tooling::applyAllReplacements(InitialCode, Replacements);
 }
 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) {
 if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style))
 E.Replacements = std::move(*NewEdits);
 else
 return NewEdits.takeError();
+return llvm::Error::success();
+}
+llvm::Error applyChange(std::string &Contents,
+const TextDocumentContentChangeEvent &Change) {
+if (!Change.range) {
+Contents = Change.text;
+return llvm::Error::success();
+}
+const Position &Start = Change.range->start;
+llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false);
+if (!StartIndex)
+return StartIndex.takeError();
+const Position &End = Change.range->end;
+llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false);
+if (!EndIndex)
+return EndIndex.takeError();
+if (*EndIndex < *StartIndex)
+return error(llvm::errc::invalid_argument,
+"Range's end position ({0}) is before start position ({1})",
+End, Start);
+// Since the range length between two LSP positions is dependent on the
+// contents of the buffer we compute the range length between the start and
+// end position ourselves and compare it to the range length of the LSP
+// message to verify the buffers of the client and server are in sync.
+// EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16
+// code units.
+ssize_t ComputedRangeLength =
+lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex));
+if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength)
+return error(llvm::errc::invalid_argument,
+"Change's rangeLength ({0}) doesn't match the "
+"computed range length ({1}).",
+*Change.rangeLength, ComputedRangeLength);
+Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text);
 return llvm::Error::success();
 }
 EligibleRegion getEligiblePoints(llvm::StringRef Code,
 llvm::StringRef FullyQualifiedName,

Mercurial > hg > CbC > CbC_llvm

comparison clang-tools-extra/clangd/SourceCode.cpp @ 221:79ff65ed7e25