diff clang-tools-extra/clangd/SourceCode.cpp @ 221:79ff65ed7e25

LLVM12 Original
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2021 19:15:29 +0900
parents 0572611fdcc8
children c4bab56944e8
line wrap: on
line diff
--- a/clang-tools-extra/clangd/SourceCode.cpp	Tue Jun 15 19:13:43 2021 +0900
+++ b/clang-tools-extra/clangd/SourceCode.cpp	Tue Jun 15 19:15:29 2021 +0900
@@ -8,10 +8,12 @@
 #include "SourceCode.h"
 
 #include "FuzzyMatch.h"
+#include "Preamble.h"
 #include "Protocol.h"
 #include "refactor/Tweak.h"
 #include "support/Context.h"
 #include "support/Logger.h"
+#include "support/Threading.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
@@ -54,8 +56,14 @@
 // Iterates over unicode codepoints in the (UTF-8) string. For each,
 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
 // Returns true if CB returned true, false if we hit the end of string.
+//
+// If the string is not valid UTF-8, we log this error and "decode" the
+// text in some arbitrary way. This is pretty sad, but this tends to happen deep
+// within indexing of headers where clang misdetected the encoding, and
+// propagating the error all the way back up is (probably?) not be worth it.
 template <typename Callback>
 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
+  bool LoggedInvalid = false;
   // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
   // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
   for (size_t I = 0; I < U8.size();) {
@@ -69,9 +77,20 @@
     // This convenient property of UTF-8 holds for all non-ASCII characters.
     size_t UTF8Length = llvm::countLeadingOnes(C);
     // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
-    // 11111xxx is not valid UTF-8 at all. Assert because it's probably our bug.
-    assert((UTF8Length >= 2 && UTF8Length <= 4) &&
-           "Invalid UTF-8, or transcoding bug?");
+    // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*.
+    if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) {
+      if (!LoggedInvalid) {
+        elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8));
+        LoggedInvalid = true;
+      }
+      // We can't give a correct result, but avoid returning something wild.
+      // Pretend this is a valid ASCII byte, for lack of better options.
+      // (Too late to get ISO-8859-* right, we've skipped some bytes already).
+      if (CB(1, 1))
+        return true;
+      ++I;
+      continue;
+    }
     I += UTF8Length; // Skip over all trailing bytes.
     // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
     // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
@@ -156,20 +175,17 @@
 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
                                         bool AllowColumnsBeyondLineLength) {
   if (P.line < 0)
-    return llvm::make_error<llvm::StringError>(
-        llvm::formatv("Line value can't be negative ({0})", P.line),
-        llvm::errc::invalid_argument);
+    return error(llvm::errc::invalid_argument,
+                 "Line value can't be negative ({0})", P.line);
   if (P.character < 0)
-    return llvm::make_error<llvm::StringError>(
-        llvm::formatv("Character value can't be negative ({0})", P.character),
-        llvm::errc::invalid_argument);
+    return error(llvm::errc::invalid_argument,
+                 "Character value can't be negative ({0})", P.character);
   size_t StartOfLine = 0;
   for (int I = 0; I != P.line; ++I) {
     size_t NextNL = Code.find('\n', StartOfLine);
     if (NextNL == llvm::StringRef::npos)
-      return llvm::make_error<llvm::StringError>(
-          llvm::formatv("Line value is out of range ({0})", P.line),
-          llvm::errc::invalid_argument);
+      return error(llvm::errc::invalid_argument,
+                   "Line value is out of range ({0})", P.line);
     StartOfLine = NextNL + 1;
   }
   StringRef Line =
@@ -179,10 +195,9 @@
   bool Valid;
   size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
   if (!Valid && !AllowColumnsBeyondLineLength)
-    return llvm::make_error<llvm::StringError>(
-        llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(),
-                      P.character, P.line),
-        llvm::errc::invalid_argument);
+    return error(llvm::errc::invalid_argument,
+                 "{0} offset {1} is invalid for line {2}", lspEncoding(),
+                 P.character, P.line);
   return StartOfLine + ByteInLine;
 }
 
@@ -430,9 +445,8 @@
 
 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) {
   assert(isValidFileRange(SM, R));
-  bool Invalid = false;
-  auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid);
-  assert(!Invalid);
+  auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin()));
+  assert(Buf);
 
   size_t BeginOffset = SM.getFileOffset(R.getBegin());
   size_t EndOffset = SM.getFileOffset(R.getEnd());
@@ -441,7 +455,7 @@
 
 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
                                                         Position P) {
-  llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer();
+  llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer();
   auto Offset =
       positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false);
   if (!Offset)
@@ -558,9 +572,10 @@
 
 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
                                           llvm::StringRef Content,
-                                          llvm::vfs::FileSystem *FS) {
+                                          const ThreadsafeFS &TFS) {
   auto Style = format::getStyle(format::DefaultFormatStyle, File,
-                                format::DefaultFallbackStyle, Content, FS);
+                                format::DefaultFallbackStyle, Content,
+                                TFS.view(/*CWD=*/llvm::None).get());
   if (!Style) {
     log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
         Style.takeError());
@@ -584,7 +599,7 @@
         Action) {
   // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
   std::string NullTerminatedCode = Code.str();
-  SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
+  SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode);
   auto &SM = FileSM.get();
   for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
     Action(Tok, SM);
@@ -617,6 +632,12 @@
   return Ranges;
 }
 
+bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) {
+  // Keywords are initialized in constructor.
+  clang::IdentifierTable KeywordsTable(LangOpts);
+  return KeywordsTable.find(NewName) != KeywordsTable.end();
+}
+
 namespace {
 struct NamespaceEvent {
   enum {
@@ -758,8 +779,8 @@
 }
 
 // Returns the prefix namespaces of NS: {"" ... NS}.
-llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) {
-  llvm::SmallVector<llvm::StringRef, 8> Results;
+llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) {
+  llvm::SmallVector<llvm::StringRef> Results;
   Results.push_back(NS.take_front(0));
   NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
   for (llvm::StringRef &R : Results)
@@ -954,15 +975,30 @@
   if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition())
     return None;
 
-  // Get the definition just before the searched location so that a macro
-  // referenced in a '#undef MACRO' can still be found. Note that we only do
-  // that if Loc is not pointing at start of file.
-  if (SM.getLocForStartOfFile(SM.getFileID(Loc)) != Loc)
-    Loc = Loc.getLocWithOffset(-1);
-  MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(IdentifierInfo, Loc);
-  if (auto *MI = MacroDef.getMacroInfo())
-    return DefinedMacro{IdentifierInfo->getName(), MI};
-  return None;
+  // We need to take special case to handle #define and #undef.
+  // Preprocessor::getMacroDefinitionAtLoc() only considers a macro
+  // definition to be in scope *after* the location of the macro name in a
+  // #define that introduces it, and *before* the location of the macro name
+  // in an #undef that undefines it. To handle these cases, we check for
+  // the macro being in scope either just after or just before the location
+  // of the token. In getting the location before, we also take care to check
+  // for start-of-file.
+  FileID FID = SM.getFileID(Loc);
+  assert(Loc != SM.getLocForEndOfFile(FID));
+  SourceLocation JustAfterToken = Loc.getLocWithOffset(1);
+  auto *MacroInfo =
+      PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo();
+  if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) {
+    SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1);
+    MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken)
+                    .getMacroInfo();
+  }
+  if (!MacroInfo) {
+    return None;
+  }
+  return DefinedMacro{
+      IdentifierInfo->getName(), MacroInfo,
+      translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)};
 }
 
 llvm::Expected<std::string> Edit::apply() const {
@@ -1017,6 +1053,49 @@
   return llvm::Error::success();
 }
 
+llvm::Error applyChange(std::string &Contents,
+                        const TextDocumentContentChangeEvent &Change) {
+  if (!Change.range) {
+    Contents = Change.text;
+    return llvm::Error::success();
+  }
+
+  const Position &Start = Change.range->start;
+  llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false);
+  if (!StartIndex)
+    return StartIndex.takeError();
+
+  const Position &End = Change.range->end;
+  llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false);
+  if (!EndIndex)
+    return EndIndex.takeError();
+
+  if (*EndIndex < *StartIndex)
+    return error(llvm::errc::invalid_argument,
+                 "Range's end position ({0}) is before start position ({1})",
+                 End, Start);
+
+  // Since the range length between two LSP positions is dependent on the
+  // contents of the buffer we compute the range length between the start and
+  // end position ourselves and compare it to the range length of the LSP
+  // message to verify the buffers of the client and server are in sync.
+
+  // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16
+  // code units.
+  ssize_t ComputedRangeLength =
+      lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex));
+
+  if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength)
+    return error(llvm::errc::invalid_argument,
+                 "Change's rangeLength ({0}) doesn't match the "
+                 "computed range length ({1}).",
+                 *Change.rangeLength, ComputedRangeLength);
+
+  Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text);
+
+  return llvm::Error::success();
+}
+
 EligibleRegion getEligiblePoints(llvm::StringRef Code,
                                  llvm::StringRef FullyQualifiedName,
                                  const LangOptions &LangOpts) {