Mercurial > hg > CbC > CbC_llvm
diff clang-tools-extra/clangd/SemanticHighlighting.cpp @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 0572611fdcc8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp Thu Feb 13 15:10:13 2020 +0900 @@ -0,0 +1,554 @@ +//===--- SemanticHighlighting.cpp - ------------------------- ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SemanticHighlighting.h" +#include "FindTarget.h" +#include "Logger.h" +#include "ParsedAST.h" +#include "Protocol.h" +#include "SourceCode.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclarationName.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Type.h" +#include "clang/AST/TypeLoc.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include <algorithm> + +namespace clang { +namespace clangd { +namespace { + +/// Some names are not written in the source code and cannot be highlighted, +/// e.g. anonymous classes. This function detects those cases. +bool canHighlightName(DeclarationName Name) { + if (Name.getNameKind() == DeclarationName::CXXConstructorName || + Name.getNameKind() == DeclarationName::CXXUsingDirective) + return true; + auto *II = Name.getAsIdentifierInfo(); + return II && !II->getName().empty(); +} + +llvm::Optional<HighlightingKind> kindForType(const Type *TP); +llvm::Optional<HighlightingKind> kindForDecl(const NamedDecl *D) { + if (auto *USD = dyn_cast<UsingShadowDecl>(D)) { + if (auto *Target = USD->getTargetDecl()) + D = Target; + } + if (auto *TD = dyn_cast<TemplateDecl>(D)) { + if (auto *Templated = TD->getTemplatedDecl()) + D = Templated; + } + if (auto *TD = dyn_cast<TypedefNameDecl>(D)) { + // We try to highlight typedefs as their underlying type. + if (auto K = kindForType(TD->getUnderlyingType().getTypePtrOrNull())) + return K; + // And fallback to a generic kind if this fails. + return HighlightingKind::Typedef; + } + // We highlight class decls, constructor decls and destructor decls as + // `Class` type. The destructor decls are handled in `VisitTagTypeLoc` (we + // will visit a TypeLoc where the underlying Type is a CXXRecordDecl). + if (auto *RD = llvm::dyn_cast<RecordDecl>(D)) { + // We don't want to highlight lambdas like classes. + if (RD->isLambda()) + return llvm::None; + return HighlightingKind::Class; + } + if (isa<ClassTemplateDecl>(D) || isa<RecordDecl>(D) || + isa<CXXConstructorDecl>(D)) + return HighlightingKind::Class; + if (auto *MD = dyn_cast<CXXMethodDecl>(D)) + return MD->isStatic() ? HighlightingKind::StaticMethod + : HighlightingKind::Method; + if (isa<FieldDecl>(D)) + return HighlightingKind::Field; + if (isa<EnumDecl>(D)) + return HighlightingKind::Enum; + if (isa<EnumConstantDecl>(D)) + return HighlightingKind::EnumConstant; + if (isa<ParmVarDecl>(D)) + return HighlightingKind::Parameter; + if (auto *VD = dyn_cast<VarDecl>(D)) + return VD->isStaticDataMember() + ? HighlightingKind::StaticField + : VD->isLocalVarDecl() ? HighlightingKind::LocalVariable + : HighlightingKind::Variable; + if (isa<BindingDecl>(D)) + return HighlightingKind::Variable; + if (isa<FunctionDecl>(D)) + return HighlightingKind::Function; + if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D) || + isa<UsingDirectiveDecl>(D)) + return HighlightingKind::Namespace; + if (isa<TemplateTemplateParmDecl>(D) || isa<TemplateTypeParmDecl>(D) || + isa<NonTypeTemplateParmDecl>(D)) + return HighlightingKind::TemplateParameter; + if (isa<ConceptDecl>(D)) + return HighlightingKind::Concept; + return llvm::None; +} +llvm::Optional<HighlightingKind> kindForType(const Type *TP) { + if (!TP) + return llvm::None; + if (TP->isBuiltinType()) // Builtins are special, they do not have decls. + return HighlightingKind::Primitive; + if (auto *TD = dyn_cast<TemplateTypeParmType>(TP)) + return kindForDecl(TD->getDecl()); + if (auto *TD = TP->getAsTagDecl()) + return kindForDecl(TD); + return llvm::None; +} + +llvm::Optional<HighlightingKind> kindForReference(const ReferenceLoc &R) { + llvm::Optional<HighlightingKind> Result; + for (const NamedDecl *Decl : R.Targets) { + if (!canHighlightName(Decl->getDeclName())) + return llvm::None; + auto Kind = kindForDecl(Decl); + if (!Kind || (Result && Kind != Result)) + return llvm::None; + Result = Kind; + } + return Result; +} + +/// Consumes source locations and maps them to text ranges for highlightings. +class HighlightingsBuilder { +public: + HighlightingsBuilder(const SourceManager &SourceMgr, + const LangOptions &LangOpts) + : SourceMgr(SourceMgr), LangOpts(LangOpts) {} + + void addToken(HighlightingToken T) { Tokens.push_back(T); } + + void addToken(SourceLocation Loc, HighlightingKind Kind) { + if (Loc.isInvalid()) + return; + if (Loc.isMacroID()) { + // Only intereseted in highlighting arguments in macros (DEF_X(arg)). + if (!SourceMgr.isMacroArgExpansion(Loc)) + return; + Loc = SourceMgr.getSpellingLoc(Loc); + } + + // Non top level decls that are included from a header are not filtered by + // topLevelDecls. (example: method declarations being included from + // another file for a class from another file). + // There are also cases with macros where the spelling loc will not be in + // the main file and the highlighting would be incorrect. + if (!isInsideMainFile(Loc, SourceMgr)) + return; + + auto Range = getTokenRange(SourceMgr, LangOpts, Loc); + if (!Range) { + // R should always have a value, if it doesn't something is very wrong. + elog("Tried to add semantic token with an invalid range"); + return; + } + Tokens.push_back(HighlightingToken{Kind, *Range}); + } + + std::vector<HighlightingToken> collect(ParsedAST &AST) && { + // Initializer lists can give duplicates of tokens, therefore all tokens + // must be deduplicated. + llvm::sort(Tokens); + auto Last = std::unique(Tokens.begin(), Tokens.end()); + Tokens.erase(Last, Tokens.end()); + + // Macros can give tokens that have the same source range but conflicting + // kinds. In this case all tokens sharing this source range should be + // removed. + std::vector<HighlightingToken> NonConflicting; + NonConflicting.reserve(Tokens.size()); + for (ArrayRef<HighlightingToken> TokRef = Tokens; !TokRef.empty();) { + ArrayRef<HighlightingToken> Conflicting = + TokRef.take_while([&](const HighlightingToken &T) { + // TokRef is guaranteed at least one element here because otherwise + // this predicate would never fire. + return T.R == TokRef.front().R; + }); + // If there is exactly one token with this range it's non conflicting and + // should be in the highlightings. + if (Conflicting.size() == 1) + NonConflicting.push_back(TokRef.front()); + // TokRef[Conflicting.size()] is the next token with a different range (or + // the end of the Tokens). + TokRef = TokRef.drop_front(Conflicting.size()); + } + // Add tokens indicating lines skipped by the preprocessor. + for (const Range &R : AST.getMacros().SkippedRanges) { + // Create one token for each line in the skipped range, so it works + // with line-based diffing. + assert(R.start.line <= R.end.line); + for (int Line = R.start.line; Line < R.end.line; ++Line) { + // Don't bother computing the offset for the end of the line, just use + // zero. The client will treat this highlighting kind specially, and + // highlight the entire line visually (i.e. not just to where the text + // on the line ends, but to the end of the screen). + NonConflicting.push_back({HighlightingKind::InactiveCode, + {Position{Line, 0}, Position{Line, 0}}}); + } + } + // Re-sort the tokens because that's what the diffing expects. + llvm::sort(NonConflicting); + return NonConflicting; + } + +private: + const SourceManager &SourceMgr; + const LangOptions &LangOpts; + std::vector<HighlightingToken> Tokens; +}; + +/// Produces highlightings, which are not captured by findExplicitReferences, +/// e.g. highlights dependent names and 'auto' as the underlying type. +class CollectExtraHighlightings + : public RecursiveASTVisitor<CollectExtraHighlightings> { +public: + CollectExtraHighlightings(HighlightingsBuilder &H) : H(H) {} + + bool VisitDecltypeTypeLoc(DecltypeTypeLoc L) { + if (auto K = kindForType(L.getTypePtr())) + H.addToken(L.getBeginLoc(), *K); + return true; + } + + bool VisitDeclaratorDecl(DeclaratorDecl *D) { + auto *AT = D->getType()->getContainedAutoType(); + if (!AT) + return true; + if (auto K = kindForType(AT->getDeducedType().getTypePtrOrNull())) + H.addToken(D->getTypeSpecStartLoc(), *K); + return true; + } + + bool VisitOverloadExpr(OverloadExpr *E) { + if (!E->decls().empty()) + return true; // handled by findExplicitReferences. + H.addToken(E->getNameLoc(), HighlightingKind::DependentName); + return true; + } + + bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) { + H.addToken(E->getMemberNameInfo().getLoc(), + HighlightingKind::DependentName); + return true; + } + + bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) { + H.addToken(E->getNameInfo().getLoc(), HighlightingKind::DependentName); + return true; + } + + bool VisitDependentNameTypeLoc(DependentNameTypeLoc L) { + H.addToken(L.getNameLoc(), HighlightingKind::DependentType); + return true; + } + + bool VisitDependentTemplateSpecializationTypeLoc( + DependentTemplateSpecializationTypeLoc L) { + H.addToken(L.getTemplateNameLoc(), HighlightingKind::DependentType); + return true; + } + + // findExplicitReferences will walk nested-name-specifiers and + // find anything that can be resolved to a Decl. However, non-leaf + // components of nested-name-specifiers which are dependent names + // (kind "Identifier") cannot be resolved to a decl, so we visit + // them here. + bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc Q) { + if (NestedNameSpecifier *NNS = Q.getNestedNameSpecifier()) { + if (NNS->getKind() == NestedNameSpecifier::Identifier) + H.addToken(Q.getLocalBeginLoc(), HighlightingKind::DependentType); + } + return RecursiveASTVisitor::TraverseNestedNameSpecifierLoc(Q); + } + +private: + HighlightingsBuilder &H; +}; + +// Encode binary data into base64. +// This was copied from compiler-rt/lib/fuzzer/FuzzerUtil.cpp. +// FIXME: Factor this out into llvm/Support? +std::string encodeBase64(const llvm::SmallVectorImpl<char> &Bytes) { + static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + std::string Res; + size_t I; + for (I = 0; I + 2 < Bytes.size(); I += 3) { + uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8) + Bytes[I + 2]; + Res += Table[(X >> 18) & 63]; + Res += Table[(X >> 12) & 63]; + Res += Table[(X >> 6) & 63]; + Res += Table[X & 63]; + } + if (I + 1 == Bytes.size()) { + uint32_t X = (Bytes[I] << 16); + Res += Table[(X >> 18) & 63]; + Res += Table[(X >> 12) & 63]; + Res += "=="; + } else if (I + 2 == Bytes.size()) { + uint32_t X = (Bytes[I] << 16) + (Bytes[I + 1] << 8); + Res += Table[(X >> 18) & 63]; + Res += Table[(X >> 12) & 63]; + Res += Table[(X >> 6) & 63]; + Res += "="; + } + return Res; +} + +void write32be(uint32_t I, llvm::raw_ostream &OS) { + std::array<char, 4> Buf; + llvm::support::endian::write32be(Buf.data(), I); + OS.write(Buf.data(), Buf.size()); +} + +void write16be(uint16_t I, llvm::raw_ostream &OS) { + std::array<char, 2> Buf; + llvm::support::endian::write16be(Buf.data(), I); + OS.write(Buf.data(), Buf.size()); +} + +// Get the highlightings on \c Line where the first entry of line is at \c +// StartLineIt. If it is not at \c StartLineIt an empty vector is returned. +ArrayRef<HighlightingToken> +takeLine(ArrayRef<HighlightingToken> AllTokens, + ArrayRef<HighlightingToken>::iterator StartLineIt, int Line) { + return ArrayRef<HighlightingToken>(StartLineIt, AllTokens.end()) + .take_while([Line](const HighlightingToken &Token) { + return Token.R.start.line == Line; + }); +} +} // namespace + +std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST) { + auto &C = AST.getASTContext(); + // Add highlightings for AST nodes. + HighlightingsBuilder Builder(AST.getSourceManager(), C.getLangOpts()); + // Highlight 'decltype' and 'auto' as their underlying types. + CollectExtraHighlightings(Builder).TraverseAST(C); + // Highlight all decls and references coming from the AST. + findExplicitReferences(C, [&](ReferenceLoc R) { + if (auto Kind = kindForReference(R)) + Builder.addToken(R.NameLoc, *Kind); + }); + // Add highlightings for macro references. + for (const auto &SIDToRefs : AST.getMacros().MacroRefs) { + for (const auto &M : SIDToRefs.second) + Builder.addToken({HighlightingKind::Macro, M}); + } + for (const auto &M : AST.getMacros().UnknownMacros) + Builder.addToken({HighlightingKind::Macro, M}); + + return std::move(Builder).collect(AST); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, HighlightingKind K) { + switch (K) { + case HighlightingKind::Variable: + return OS << "Variable"; + case HighlightingKind::LocalVariable: + return OS << "LocalVariable"; + case HighlightingKind::Parameter: + return OS << "Parameter"; + case HighlightingKind::Function: + return OS << "Function"; + case HighlightingKind::Method: + return OS << "Method"; + case HighlightingKind::StaticMethod: + return OS << "StaticMethod"; + case HighlightingKind::Field: + return OS << "Field"; + case HighlightingKind::StaticField: + return OS << "StaticField"; + case HighlightingKind::Class: + return OS << "Class"; + case HighlightingKind::Enum: + return OS << "Enum"; + case HighlightingKind::EnumConstant: + return OS << "EnumConstant"; + case HighlightingKind::Typedef: + return OS << "Typedef"; + case HighlightingKind::DependentType: + return OS << "DependentType"; + case HighlightingKind::DependentName: + return OS << "DependentName"; + case HighlightingKind::Namespace: + return OS << "Namespace"; + case HighlightingKind::TemplateParameter: + return OS << "TemplateParameter"; + case HighlightingKind::Concept: + return OS << "Concept"; + case HighlightingKind::Primitive: + return OS << "Primitive"; + case HighlightingKind::Macro: + return OS << "Macro"; + case HighlightingKind::InactiveCode: + return OS << "InactiveCode"; + } + llvm_unreachable("invalid HighlightingKind"); +} + +std::vector<LineHighlightings> +diffHighlightings(ArrayRef<HighlightingToken> New, + ArrayRef<HighlightingToken> Old) { + assert(std::is_sorted(New.begin(), New.end()) && + "New must be a sorted vector"); + assert(std::is_sorted(Old.begin(), Old.end()) && + "Old must be a sorted vector"); + + // FIXME: There's an edge case when tokens span multiple lines. If the first + // token on the line started on a line above the current one and the rest of + // the line is the equal to the previous one than we will remove all + // highlights but the ones for the token spanning multiple lines. This means + // that when we get into the LSP layer the only highlights that will be + // visible are the ones for the token spanning multiple lines. + // Example: + // EndOfMultilineToken Token Token Token + // If "Token Token Token" don't differ from previously the line is + // incorrectly removed. Suggestion to fix is to separate any multiline tokens + // into one token for every line it covers. This requires reading from the + // file buffer to figure out the length of each line though. + std::vector<LineHighlightings> DiffedLines; + // ArrayRefs to the current line in the highlightings. + ArrayRef<HighlightingToken> NewLine(New.begin(), + /*length*/ static_cast<size_t>(0)); + ArrayRef<HighlightingToken> OldLine(Old.begin(), + /*length*/ static_cast<size_t>(0)); + auto NewEnd = New.end(); + auto OldEnd = Old.end(); + auto NextLineNumber = [&]() { + int NextNew = NewLine.end() != NewEnd ? NewLine.end()->R.start.line + : std::numeric_limits<int>::max(); + int NextOld = OldLine.end() != OldEnd ? OldLine.end()->R.start.line + : std::numeric_limits<int>::max(); + return std::min(NextNew, NextOld); + }; + + for (int LineNumber = 0; NewLine.end() < NewEnd || OldLine.end() < OldEnd; + LineNumber = NextLineNumber()) { + NewLine = takeLine(New, NewLine.end(), LineNumber); + OldLine = takeLine(Old, OldLine.end(), LineNumber); + if (NewLine != OldLine) { + DiffedLines.push_back({LineNumber, NewLine, /*IsInactive=*/false}); + + // Turn a HighlightingKind::InactiveCode token into the IsInactive flag. + auto &AddedLine = DiffedLines.back(); + llvm::erase_if(AddedLine.Tokens, [&](const HighlightingToken &T) { + if (T.Kind == HighlightingKind::InactiveCode) { + AddedLine.IsInactive = true; + return true; + } + return false; + }); + } + } + + return DiffedLines; +} + +bool operator==(const HighlightingToken &L, const HighlightingToken &R) { + return std::tie(L.R, L.Kind) == std::tie(R.R, R.Kind); +} +bool operator<(const HighlightingToken &L, const HighlightingToken &R) { + return std::tie(L.R, L.Kind) < std::tie(R.R, R.Kind); +} +bool operator==(const LineHighlightings &L, const LineHighlightings &R) { + return std::tie(L.Line, L.Tokens) == std::tie(R.Line, R.Tokens); +} + +std::vector<SemanticHighlightingInformation> +toSemanticHighlightingInformation(llvm::ArrayRef<LineHighlightings> Tokens) { + if (Tokens.size() == 0) + return {}; + + // FIXME: Tokens might be multiple lines long (block comments) in this case + // this needs to add multiple lines for those tokens. + std::vector<SemanticHighlightingInformation> Lines; + Lines.reserve(Tokens.size()); + for (const auto &Line : Tokens) { + llvm::SmallVector<char, 128> LineByteTokens; + llvm::raw_svector_ostream OS(LineByteTokens); + for (const auto &Token : Line.Tokens) { + // Writes the token to LineByteTokens in the byte format specified by the + // LSP proposal. Described below. + // |<---- 4 bytes ---->|<-- 2 bytes -->|<--- 2 bytes -->| + // | character | length | index | + + write32be(Token.R.start.character, OS); + write16be(Token.R.end.character - Token.R.start.character, OS); + write16be(static_cast<int>(Token.Kind), OS); + } + + Lines.push_back({Line.Line, encodeBase64(LineByteTokens), Line.IsInactive}); + } + + return Lines; +} + +llvm::StringRef toTextMateScope(HighlightingKind Kind) { + // FIXME: Add scopes for C and Objective C. + switch (Kind) { + case HighlightingKind::Function: + return "entity.name.function.cpp"; + case HighlightingKind::Method: + return "entity.name.function.method.cpp"; + case HighlightingKind::StaticMethod: + return "entity.name.function.method.static.cpp"; + case HighlightingKind::Variable: + return "variable.other.cpp"; + case HighlightingKind::LocalVariable: + return "variable.other.local.cpp"; + case HighlightingKind::Parameter: + return "variable.parameter.cpp"; + case HighlightingKind::Field: + return "variable.other.field.cpp"; + case HighlightingKind::StaticField: + return "variable.other.field.static.cpp"; + case HighlightingKind::Class: + return "entity.name.type.class.cpp"; + case HighlightingKind::Enum: + return "entity.name.type.enum.cpp"; + case HighlightingKind::EnumConstant: + return "variable.other.enummember.cpp"; + case HighlightingKind::Typedef: + return "entity.name.type.typedef.cpp"; + case HighlightingKind::DependentType: + return "entity.name.type.dependent.cpp"; + case HighlightingKind::DependentName: + return "entity.name.other.dependent.cpp"; + case HighlightingKind::Namespace: + return "entity.name.namespace.cpp"; + case HighlightingKind::TemplateParameter: + return "entity.name.type.template.cpp"; + case HighlightingKind::Concept: + return "entity.name.type.concept.cpp"; + case HighlightingKind::Primitive: + return "storage.type.primitive.cpp"; + case HighlightingKind::Macro: + return "entity.name.function.preprocessor.cpp"; + case HighlightingKind::InactiveCode: + return "meta.disabled"; + } + llvm_unreachable("unhandled HighlightingKind"); +} + +} // namespace clangd +} // namespace clang