Mercurial > hg > CbC > CbC_llvm
diff clang-tools-extra/clangd/Quality.cpp @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 0572611fdcc8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clang-tools-extra/clangd/Quality.cpp Thu Feb 13 15:10:13 2020 +0900 @@ -0,0 +1,505 @@ +//===--- Quality.cpp ---------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Quality.h" +#include "AST.h" +#include "FileDistance.h" +#include "SourceCode.h" +#include "URI.h" +#include "index/Symbol.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/DeclVisitor.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Sema/CodeCompleteConsumer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cmath> + +namespace clang { +namespace clangd { +static bool isReserved(llvm::StringRef Name) { + // FIXME: Should we exclude _Bool and others recognized by the standard? + return Name.size() >= 2 && Name[0] == '_' && + (isUppercase(Name[1]) || Name[1] == '_'); +} + +static bool hasDeclInMainFile(const Decl &D) { + auto &SourceMgr = D.getASTContext().getSourceManager(); + for (auto *Redecl : D.redecls()) { + if (isInsideMainFile(Redecl->getLocation(), SourceMgr)) + return true; + } + return false; +} + +static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) { + const auto &Context = R.Declaration->getASTContext(); + const auto &SourceMgr = Context.getSourceManager(); + if (R.ShadowDecl) { + if (isInsideMainFile(R.ShadowDecl->getLocation(), SourceMgr)) + return true; + } + return false; +} + +static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) { + if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) { + if (FD->isOverloadedOperator()) + return SymbolQualitySignals::Operator; + } + class Switch + : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> { + public: +#define MAP(DeclType, Category) \ + SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \ + return SymbolQualitySignals::Category; \ + } + MAP(NamespaceDecl, Namespace); + MAP(NamespaceAliasDecl, Namespace); + MAP(TypeDecl, Type); + MAP(TypeAliasTemplateDecl, Type); + MAP(ClassTemplateDecl, Type); + MAP(CXXConstructorDecl, Constructor); + MAP(CXXDestructorDecl, Destructor); + MAP(ValueDecl, Variable); + MAP(VarTemplateDecl, Variable); + MAP(FunctionDecl, Function); + MAP(FunctionTemplateDecl, Function); + MAP(Decl, Unknown); +#undef MAP + }; + return Switch().Visit(&ND); +} + +static SymbolQualitySignals::SymbolCategory +categorize(const CodeCompletionResult &R) { + if (R.Declaration) + return categorize(*R.Declaration); + if (R.Kind == CodeCompletionResult::RK_Macro) + return SymbolQualitySignals::Macro; + // Everything else is a keyword or a pattern. Patterns are mostly keywords + // too, except a few which we recognize by cursor kind. + switch (R.CursorKind) { + case CXCursor_CXXMethod: + return SymbolQualitySignals::Function; + case CXCursor_ModuleImportDecl: + return SymbolQualitySignals::Namespace; + case CXCursor_MacroDefinition: + return SymbolQualitySignals::Macro; + case CXCursor_TypeRef: + return SymbolQualitySignals::Type; + case CXCursor_MemberRef: + return SymbolQualitySignals::Variable; + case CXCursor_Constructor: + return SymbolQualitySignals::Constructor; + default: + return SymbolQualitySignals::Keyword; + } +} + +static SymbolQualitySignals::SymbolCategory +categorize(const index::SymbolInfo &D) { + switch (D.Kind) { + case index::SymbolKind::Namespace: + case index::SymbolKind::NamespaceAlias: + return SymbolQualitySignals::Namespace; + case index::SymbolKind::Macro: + return SymbolQualitySignals::Macro; + case index::SymbolKind::Enum: + case index::SymbolKind::Struct: + case index::SymbolKind::Class: + case index::SymbolKind::Protocol: + case index::SymbolKind::Extension: + case index::SymbolKind::Union: + case index::SymbolKind::TypeAlias: + return SymbolQualitySignals::Type; + case index::SymbolKind::Function: + case index::SymbolKind::ClassMethod: + case index::SymbolKind::InstanceMethod: + case index::SymbolKind::StaticMethod: + case index::SymbolKind::InstanceProperty: + case index::SymbolKind::ClassProperty: + case index::SymbolKind::StaticProperty: + case index::SymbolKind::ConversionFunction: + return SymbolQualitySignals::Function; + case index::SymbolKind::Destructor: + return SymbolQualitySignals::Destructor; + case index::SymbolKind::Constructor: + return SymbolQualitySignals::Constructor; + case index::SymbolKind::Variable: + case index::SymbolKind::Field: + case index::SymbolKind::EnumConstant: + case index::SymbolKind::Parameter: + return SymbolQualitySignals::Variable; + case index::SymbolKind::Using: + case index::SymbolKind::Module: + case index::SymbolKind::Unknown: + return SymbolQualitySignals::Unknown; + } + llvm_unreachable("Unknown index::SymbolKind"); +} + +static bool isInstanceMember(const NamedDecl *ND) { + if (!ND) + return false; + if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND)) + ND = TP->TemplateDecl::getTemplatedDecl(); + if (const auto *CM = dyn_cast<CXXMethodDecl>(ND)) + return !CM->isStatic(); + return isa<FieldDecl>(ND); // Note that static fields are VarDecl. +} + +static bool isInstanceMember(const index::SymbolInfo &D) { + switch (D.Kind) { + case index::SymbolKind::InstanceMethod: + case index::SymbolKind::InstanceProperty: + case index::SymbolKind::Field: + return true; + default: + return false; + } +} + +void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) { + Deprecated |= (SemaCCResult.Availability == CXAvailability_Deprecated); + Category = categorize(SemaCCResult); + + if (SemaCCResult.Declaration) { + ImplementationDetail |= isImplementationDetail(SemaCCResult.Declaration); + if (auto *ID = SemaCCResult.Declaration->getIdentifier()) + ReservedName = ReservedName || isReserved(ID->getName()); + } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro) + ReservedName = ReservedName || isReserved(SemaCCResult.Macro->getName()); +} + +void SymbolQualitySignals::merge(const Symbol &IndexResult) { + Deprecated |= (IndexResult.Flags & Symbol::Deprecated); + ImplementationDetail |= (IndexResult.Flags & Symbol::ImplementationDetail); + References = std::max(IndexResult.References, References); + Category = categorize(IndexResult.SymInfo); + ReservedName = ReservedName || isReserved(IndexResult.Name); +} + +float SymbolQualitySignals::evaluate() const { + float Score = 1; + + // This avoids a sharp gradient for tail symbols, and also neatly avoids the + // question of whether 0 references means a bad symbol or missing data. + if (References >= 10) { + // Use a sigmoid style boosting function, which flats out nicely for large + // numbers (e.g. 2.58 for 1M refererences). + // The following boosting function is equivalent to: + // m = 0.06 + // f = 12.0 + // boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59 + // Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82), + // (10K, 2.21), (100K, 2.58), (1M, 2.94) + float S = std::pow(References, -0.06); + Score *= 6.0 * (1 - S) / (1 + S) + 0.59; + } + + if (Deprecated) + Score *= 0.1f; + if (ReservedName) + Score *= 0.1f; + if (ImplementationDetail) + Score *= 0.2f; + + switch (Category) { + case Keyword: // Often relevant, but misses most signals. + Score *= 4; // FIXME: important keywords should have specific boosts. + break; + case Type: + case Function: + case Variable: + Score *= 1.1f; + break; + case Namespace: + Score *= 0.8f; + break; + case Macro: + case Destructor: + case Operator: + Score *= 0.5f; + break; + case Constructor: // No boost constructors so they are after class types. + case Unknown: + break; + } + + return Score; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SymbolQualitySignals &S) { + OS << llvm::formatv("=== Symbol quality: {0}\n", S.evaluate()); + OS << llvm::formatv("\tReferences: {0}\n", S.References); + OS << llvm::formatv("\tDeprecated: {0}\n", S.Deprecated); + OS << llvm::formatv("\tReserved name: {0}\n", S.ReservedName); + OS << llvm::formatv("\tCategory: {0}\n", static_cast<int>(S.Category)); + return OS; +} + +static SymbolRelevanceSignals::AccessibleScope +computeScope(const NamedDecl *D) { + // Injected "Foo" within the class "Foo" has file scope, not class scope. + const DeclContext *DC = D->getDeclContext(); + if (auto *R = dyn_cast_or_null<RecordDecl>(D)) + if (R->isInjectedClassName()) + DC = DC->getParent(); + // Class constructor should have the same scope as the class. + if (isa<CXXConstructorDecl>(D)) + DC = DC->getParent(); + bool InClass = false; + for (; !DC->isFileContext(); DC = DC->getParent()) { + if (DC->isFunctionOrMethod()) + return SymbolRelevanceSignals::FunctionScope; + InClass = InClass || DC->isRecord(); + } + if (InClass) + return SymbolRelevanceSignals::ClassScope; + // ExternalLinkage threshold could be tweaked, e.g. module-visible as global. + // Avoid caching linkage if it may change after enclosing code completion. + if (hasUnstableLinkage(D) || D->getLinkageInternal() < ExternalLinkage) + return SymbolRelevanceSignals::FileScope; + return SymbolRelevanceSignals::GlobalScope; +} + +void SymbolRelevanceSignals::merge(const Symbol &IndexResult) { + SymbolURI = IndexResult.CanonicalDeclaration.FileURI; + SymbolScope = IndexResult.Scope; + IsInstanceMember |= isInstanceMember(IndexResult.SymInfo); + if (!(IndexResult.Flags & Symbol::VisibleOutsideFile)) { + Scope = AccessibleScope::FileScope; + } +} + +void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { + if (SemaCCResult.Availability == CXAvailability_NotAvailable || + SemaCCResult.Availability == CXAvailability_NotAccessible) + Forbidden = true; + + if (SemaCCResult.Declaration) { + SemaSaysInScope = true; + // We boost things that have decls in the main file. We give a fixed score + // for all other declarations in sema as they are already included in the + // translation unit. + float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) || + hasUsingDeclInMainFile(SemaCCResult)) + ? 1.0 + : 0.6; + SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore); + IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration); + InBaseClass |= SemaCCResult.InBaseClass; + } + + // Declarations are scoped, others (like macros) are assumed global. + if (SemaCCResult.Declaration) + Scope = std::min(Scope, computeScope(SemaCCResult.Declaration)); + + NeedsFixIts = !SemaCCResult.FixIts.empty(); +} + +static std::pair<float, unsigned> uriProximity(llvm::StringRef SymbolURI, + URIDistance *D) { + if (!D || SymbolURI.empty()) + return {0.f, 0u}; + unsigned Distance = D->distance(SymbolURI); + // Assume approximately default options are used for sensible scoring. + return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; +} + +static float scopeBoost(ScopeDistance &Distance, + llvm::Optional<llvm::StringRef> SymbolScope) { + if (!SymbolScope) + return 1; + auto D = Distance.distance(*SymbolScope); + if (D == FileDistance::Unreachable) + return 0.6f; + return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0)); +} + +static llvm::Optional<llvm::StringRef> +wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) { + if (ContextWords) + for (const auto& Word : ContextWords->keys()) + if (Name.contains_lower(Word)) + return Word; + return llvm::None; +} + +float SymbolRelevanceSignals::evaluate() const { + float Score = 1; + + if (Forbidden) + return 0; + + Score *= NameMatch; + + // File proximity scores are [0,1] and we translate them into a multiplier in + // the range from 1 to 3. + Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first, + SemaFileProximityScore); + + if (ScopeProximityMatch) + // Use a constant scope boost for sema results, as scopes of sema results + // can be tricky (e.g. class/function scope). Set to the max boost as we + // don't load top-level symbols from the preamble and sema results are + // always in the accessible scope. + Score *= + SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope); + + if (wordMatching(Name, ContextWords)) + Score *= 1.5; + + // Symbols like local variables may only be referenced within their scope. + // Conversely if we're in that scope, it's likely we'll reference them. + if (Query == CodeComplete) { + // The narrower the scope where a symbol is visible, the more likely it is + // to be relevant when it is available. + switch (Scope) { + case GlobalScope: + break; + case FileScope: + Score *= 1.5f; + break; + case ClassScope: + Score *= 2; + break; + case FunctionScope: + Score *= 4; + break; + } + } else { + // For non-completion queries, the wider the scope where a symbol is + // visible, the more likely it is to be relevant. + switch (Scope) { + case GlobalScope: + break; + case FileScope: + Score *= 0.5f; + break; + default: + // TODO: Handle other scopes as we start to use them for index results. + break; + } + } + + if (TypeMatchesPreferred) + Score *= 5.0; + + // Penalize non-instance members when they are accessed via a class instance. + if (!IsInstanceMember && + (Context == CodeCompletionContext::CCC_DotMemberAccess || + Context == CodeCompletionContext::CCC_ArrowMemberAccess)) { + Score *= 0.2f; + } + + if (InBaseClass) + Score *= 0.5f; + + // Penalize for FixIts. + if (NeedsFixIts) + Score *= 0.5f; + + return Score; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SymbolRelevanceSignals &S) { + OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluate()); + OS << llvm::formatv("\tName: {0}\n", S.Name); + OS << llvm::formatv("\tName match: {0}\n", S.NameMatch); + if (S.ContextWords) + OS << llvm::formatv( + "\tMatching context word: {0}\n", + wordMatching(S.Name, S.ContextWords).getValueOr("<none>")); + OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden); + OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts); + OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember); + OS << llvm::formatv("\tContext: {0}\n", getCompletionKindString(S.Context)); + OS << llvm::formatv("\tQuery type: {0}\n", static_cast<int>(S.Query)); + OS << llvm::formatv("\tScope: {0}\n", static_cast<int>(S.Scope)); + + OS << llvm::formatv("\tSymbol URI: {0}\n", S.SymbolURI); + OS << llvm::formatv("\tSymbol scope: {0}\n", + S.SymbolScope ? *S.SymbolScope : "<None>"); + + if (S.FileProximityMatch) { + auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch); + OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", + Score.first, Score.second); + } + OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore); + + OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope); + if (S.ScopeProximityMatch) + OS << llvm::formatv("\tIndex scope boost: {0}\n", + scopeBoost(*S.ScopeProximityMatch, S.SymbolScope)); + + OS << llvm::formatv( + "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n", + S.TypeMatchesPreferred, S.HadContextType, S.HadSymbolType); + + return OS; +} + +float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) { + return SymbolQuality * SymbolRelevance; +} + +// Produces an integer that sorts in the same order as F. +// That is: a < b <==> encodeFloat(a) < encodeFloat(b). +static uint32_t encodeFloat(float F) { + static_assert(std::numeric_limits<float>::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = llvm::FloatToBits(F); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +std::string sortText(float Score, llvm::StringRef Name) { + // We convert -Score to an integer, and hex-encode for readability. + // Example: [0.5, "foo"] -> "41000000foo" + std::string S; + llvm::raw_string_ostream OS(S); + llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, + /*Width=*/2 * sizeof(Score)); + OS << Name; + OS.flush(); + return S; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SignatureQualitySignals &S) { + OS << llvm::formatv("=== Signature Quality:\n"); + OS << llvm::formatv("\tNumber of parameters: {0}\n", S.NumberOfParameters); + OS << llvm::formatv("\tNumber of optional parameters: {0}\n", + S.NumberOfOptionalParameters); + OS << llvm::formatv("\tKind: {0}\n", S.Kind); + return OS; +} + +} // namespace clangd +} // namespace clang