150
|
1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 ///
|
|
9 /// Some operations such as code completion produce a set of candidates.
|
|
10 /// Usually the user can choose between them, but we should put the best options
|
|
11 /// at the top (they're easier to select, and more likely to be seen).
|
|
12 ///
|
|
13 /// This file defines building blocks for ranking candidates.
|
|
14 /// It's used by the features directly and also in the implementation of
|
|
15 /// indexes, as indexes also need to heuristically limit their results.
|
|
16 ///
|
|
17 /// The facilities here are:
|
|
18 /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
|
|
19 /// These are structured in a way that they can be debugged, and are fairly
|
|
20 /// consistent regardless of the source.
|
|
21 /// - compute scores from scoring signals. These are suitable for sorting.
|
|
22 /// - sorting utilities like the TopN container.
|
|
23 /// These could be split up further to isolate dependencies if we care.
|
|
24 ///
|
|
25 //===----------------------------------------------------------------------===//
|
|
26
|
|
27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
|
|
28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
|
|
29
|
|
30 #include "FileDistance.h"
|
|
31 #include "clang/Sema/CodeCompleteConsumer.h"
|
|
32 #include "llvm/ADT/StringRef.h"
|
|
33 #include "llvm/ADT/StringSet.h"
|
|
34 #include <algorithm>
|
|
35 #include <functional>
|
|
36 #include <vector>
|
|
37
|
|
38 namespace llvm {
|
|
39 class raw_ostream;
|
|
40 } // namespace llvm
|
|
41
|
|
42 namespace clang {
|
|
43 class CodeCompletionResult;
|
|
44
|
|
45 namespace clangd {
|
236
|
46 struct ASTSignals;
|
150
|
47 struct Symbol;
|
|
48 class URIDistance;
|
|
49
|
|
50 // Signals structs are designed to be aggregated from 0 or more sources.
|
|
51 // A default instance has neutral signals, and sources are merged into it.
|
|
52 // They can be dumped for debugging, and evaluate()d into a score.
|
|
53
|
|
54 /// Attributes of a symbol that affect how much we like it.
|
|
55 struct SymbolQualitySignals {
|
|
56 bool Deprecated = false;
|
|
57 bool ReservedName = false; // __foo, _Foo are usually implementation details.
|
|
58 // FIXME: make these findable once user types _.
|
|
59 bool ImplementationDetail = false;
|
|
60 unsigned References = 0;
|
|
61
|
|
62 enum SymbolCategory {
|
|
63 Unknown = 0,
|
|
64 Variable,
|
|
65 Macro,
|
|
66 Type,
|
|
67 Function,
|
|
68 Constructor,
|
|
69 Destructor,
|
|
70 Namespace,
|
|
71 Keyword,
|
|
72 Operator,
|
|
73 } Category = Unknown;
|
|
74
|
|
75 void merge(const CodeCompletionResult &SemaCCResult);
|
|
76 void merge(const Symbol &IndexResult);
|
|
77
|
|
78 // Condense these signals down to a single number, higher is better.
|
221
|
79 float evaluateHeuristics() const;
|
150
|
80 };
|
|
81 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
|
|
82 const SymbolQualitySignals &);
|
|
83
|
|
84 /// Attributes of a symbol-query pair that affect how much we like it.
|
|
85 struct SymbolRelevanceSignals {
|
|
86 /// The name of the symbol (for ContextWords). Must be explicitly assigned.
|
|
87 llvm::StringRef Name;
|
|
88 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
|
|
89 float NameMatch = 1;
|
|
90 /// Lowercase words relevant to the context (e.g. near the completion point).
|
|
91 llvm::StringSet<>* ContextWords = nullptr;
|
|
92 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
|
|
93 /// Whether fixits needs to be applied for that completion or not.
|
|
94 bool NeedsFixIts = false;
|
|
95 bool InBaseClass = false; // A member from base class of the accessed class.
|
|
96
|
|
97 URIDistance *FileProximityMatch = nullptr;
|
|
98 /// These are used to calculate proximity between the index symbol and the
|
|
99 /// query.
|
|
100 llvm::StringRef SymbolURI;
|
|
101 /// FIXME: unify with index proximity score - signals should be
|
|
102 /// source-independent.
|
|
103 /// Proximity between best declaration and the query. [0-1], 1 is closest.
|
|
104 float SemaFileProximityScore = 0;
|
|
105
|
|
106 // Scope proximity is only considered (both index and sema) when this is set.
|
|
107 ScopeDistance *ScopeProximityMatch = nullptr;
|
|
108 llvm::Optional<llvm::StringRef> SymbolScope;
|
|
109 // A symbol from sema should be accessible from the current scope.
|
|
110 bool SemaSaysInScope = false;
|
|
111
|
|
112 // An approximate measure of where we expect the symbol to be used.
|
|
113 enum AccessibleScope {
|
|
114 FunctionScope,
|
|
115 ClassScope,
|
|
116 FileScope,
|
|
117 GlobalScope,
|
|
118 } Scope = GlobalScope;
|
|
119
|
|
120 enum QueryType {
|
|
121 CodeComplete,
|
|
122 Generic,
|
|
123 } Query = Generic;
|
|
124
|
|
125 CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
|
|
126
|
|
127 // Whether symbol is an instance member of a class.
|
|
128 bool IsInstanceMember = false;
|
|
129
|
|
130 // Whether clang provided a preferred type in the completion context.
|
|
131 bool HadContextType = false;
|
|
132 // Whether a source completion item or a symbol had a type information.
|
|
133 bool HadSymbolType = false;
|
|
134 // Whether the item matches the type expected in the completion context.
|
|
135 bool TypeMatchesPreferred = false;
|
|
136
|
221
|
137 /// Length of the unqualified partial name of Symbol typed in
|
|
138 /// CompletionPrefix.
|
|
139 unsigned FilterLength = 0;
|
|
140
|
|
141 const ASTSignals *MainFileSignals = nullptr;
|
|
142 /// Number of references to the candidate in the main file.
|
|
143 unsigned MainFileRefs = 0;
|
|
144 /// Number of unique symbols in the main file which belongs to candidate's
|
|
145 /// namespace. This indicates how relevant the namespace is in the current
|
|
146 /// file.
|
|
147 unsigned ScopeRefsInFile = 0;
|
|
148
|
|
149 /// Set of derived signals computed by calculateDerivedSignals(). Must not be
|
|
150 /// set explicitly.
|
|
151 struct DerivedSignals {
|
|
152 /// Whether Name contains some word from context.
|
|
153 bool NameMatchesContext = false;
|
|
154 /// Min distance between SymbolURI and all the headers included by the TU.
|
|
155 unsigned FileProximityDistance = FileDistance::Unreachable;
|
|
156 /// Min distance between SymbolScope and all the available scopes.
|
|
157 unsigned ScopeProximityDistance = FileDistance::Unreachable;
|
|
158 };
|
|
159
|
|
160 DerivedSignals calculateDerivedSignals() const;
|
|
161
|
150
|
162 void merge(const CodeCompletionResult &SemaResult);
|
|
163 void merge(const Symbol &IndexResult);
|
221
|
164 void computeASTSignals(const CodeCompletionResult &SemaResult);
|
150
|
165
|
|
166 // Condense these signals down to a single number, higher is better.
|
221
|
167 float evaluateHeuristics() const;
|
150
|
168 };
|
|
169 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
|
|
170 const SymbolRelevanceSignals &);
|
|
171
|
|
172 /// Combine symbol quality and relevance into a single score.
|
|
173 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
|
|
174
|
221
|
175 /// Same semantics as CodeComplete::Score. Quality score and Relevance score
|
|
176 /// have been removed since DecisionForest cannot assign individual scores to
|
|
177 /// Quality and Relevance signals.
|
|
178 struct DecisionForestScores {
|
|
179 float Total = 0.f;
|
|
180 float ExcludingName = 0.f;
|
|
181 };
|
|
182
|
|
183 DecisionForestScores
|
|
184 evaluateDecisionForest(const SymbolQualitySignals &Quality,
|
|
185 const SymbolRelevanceSignals &Relevance, float Base);
|
|
186
|
150
|
187 /// TopN<T> is a lossy container that preserves only the "best" N elements.
|
|
188 template <typename T, typename Compare = std::greater<T>> class TopN {
|
|
189 public:
|
|
190 using value_type = T;
|
|
191 TopN(size_t N, Compare Greater = Compare())
|
|
192 : N(N), Greater(std::move(Greater)) {}
|
|
193
|
|
194 // Adds a candidate to the set.
|
|
195 // Returns true if a candidate was dropped to get back under N.
|
|
196 bool push(value_type &&V) {
|
|
197 bool Dropped = false;
|
|
198 if (Heap.size() >= N) {
|
|
199 Dropped = true;
|
|
200 if (N > 0 && Greater(V, Heap.front())) {
|
|
201 std::pop_heap(Heap.begin(), Heap.end(), Greater);
|
|
202 Heap.back() = std::move(V);
|
|
203 std::push_heap(Heap.begin(), Heap.end(), Greater);
|
|
204 }
|
|
205 } else {
|
|
206 Heap.push_back(std::move(V));
|
|
207 std::push_heap(Heap.begin(), Heap.end(), Greater);
|
|
208 }
|
|
209 assert(Heap.size() <= N);
|
|
210 assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
|
|
211 return Dropped;
|
|
212 }
|
|
213
|
|
214 // Returns candidates from best to worst.
|
|
215 std::vector<value_type> items() && {
|
|
216 std::sort_heap(Heap.begin(), Heap.end(), Greater);
|
|
217 assert(Heap.size() <= N);
|
|
218 return std::move(Heap);
|
|
219 }
|
|
220
|
|
221 private:
|
|
222 const size_t N;
|
|
223 std::vector<value_type> Heap; // Min-heap, comparator is Greater.
|
|
224 Compare Greater;
|
|
225 };
|
|
226
|
|
227 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for
|
|
228 /// LSP. (The highest score compares smallest so it sorts at the top).
|
|
229 std::string sortText(float Score, llvm::StringRef Tiebreak = "");
|
|
230
|
|
231 struct SignatureQualitySignals {
|
|
232 uint32_t NumberOfParameters = 0;
|
|
233 uint32_t NumberOfOptionalParameters = 0;
|
|
234 CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind =
|
|
235 CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function;
|
|
236 };
|
|
237 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
|
|
238 const SignatureQualitySignals &);
|
|
239
|
|
240 } // namespace clangd
|
|
241 } // namespace clang
|
|
242
|
|
243 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
|