150
|
1 //===--- Quality.cpp ---------------------------------------------*- C++-*-===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8
|
|
9 #include "Quality.h"
|
|
10 #include "AST.h"
|
221
|
11 #include "CompletionModel.h"
|
150
|
12 #include "FileDistance.h"
|
|
13 #include "SourceCode.h"
|
|
14 #include "URI.h"
|
|
15 #include "index/Symbol.h"
|
|
16 #include "clang/AST/ASTContext.h"
|
|
17 #include "clang/AST/Decl.h"
|
|
18 #include "clang/AST/DeclCXX.h"
|
|
19 #include "clang/AST/DeclTemplate.h"
|
|
20 #include "clang/AST/DeclVisitor.h"
|
|
21 #include "clang/Basic/CharInfo.h"
|
|
22 #include "clang/Basic/SourceManager.h"
|
|
23 #include "clang/Sema/CodeCompleteConsumer.h"
|
|
24 #include "llvm/ADT/ArrayRef.h"
|
|
25 #include "llvm/ADT/SmallString.h"
|
|
26 #include "llvm/ADT/SmallVector.h"
|
|
27 #include "llvm/ADT/StringExtras.h"
|
|
28 #include "llvm/ADT/StringRef.h"
|
|
29 #include "llvm/Support/Casting.h"
|
|
30 #include "llvm/Support/FormatVariadic.h"
|
|
31 #include "llvm/Support/MathExtras.h"
|
|
32 #include "llvm/Support/raw_ostream.h"
|
|
33 #include <algorithm>
|
|
34 #include <cmath>
|
|
35
|
|
36 namespace clang {
|
|
37 namespace clangd {
|
|
38 static bool isReserved(llvm::StringRef Name) {
|
|
39 // FIXME: Should we exclude _Bool and others recognized by the standard?
|
|
40 return Name.size() >= 2 && Name[0] == '_' &&
|
|
41 (isUppercase(Name[1]) || Name[1] == '_');
|
|
42 }
|
|
43
|
|
44 static bool hasDeclInMainFile(const Decl &D) {
|
|
45 auto &SourceMgr = D.getASTContext().getSourceManager();
|
|
46 for (auto *Redecl : D.redecls()) {
|
|
47 if (isInsideMainFile(Redecl->getLocation(), SourceMgr))
|
|
48 return true;
|
|
49 }
|
|
50 return false;
|
|
51 }
|
|
52
|
|
53 static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) {
|
|
54 const auto &Context = R.Declaration->getASTContext();
|
|
55 const auto &SourceMgr = Context.getSourceManager();
|
|
56 if (R.ShadowDecl) {
|
|
57 if (isInsideMainFile(R.ShadowDecl->getLocation(), SourceMgr))
|
|
58 return true;
|
|
59 }
|
|
60 return false;
|
|
61 }
|
|
62
|
|
63 static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
|
|
64 if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) {
|
|
65 if (FD->isOverloadedOperator())
|
|
66 return SymbolQualitySignals::Operator;
|
|
67 }
|
|
68 class Switch
|
|
69 : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
|
|
70 public:
|
|
71 #define MAP(DeclType, Category) \
|
|
72 SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
|
|
73 return SymbolQualitySignals::Category; \
|
|
74 }
|
|
75 MAP(NamespaceDecl, Namespace);
|
|
76 MAP(NamespaceAliasDecl, Namespace);
|
|
77 MAP(TypeDecl, Type);
|
|
78 MAP(TypeAliasTemplateDecl, Type);
|
|
79 MAP(ClassTemplateDecl, Type);
|
|
80 MAP(CXXConstructorDecl, Constructor);
|
|
81 MAP(CXXDestructorDecl, Destructor);
|
|
82 MAP(ValueDecl, Variable);
|
|
83 MAP(VarTemplateDecl, Variable);
|
|
84 MAP(FunctionDecl, Function);
|
|
85 MAP(FunctionTemplateDecl, Function);
|
|
86 MAP(Decl, Unknown);
|
|
87 #undef MAP
|
|
88 };
|
|
89 return Switch().Visit(&ND);
|
|
90 }
|
|
91
|
|
92 static SymbolQualitySignals::SymbolCategory
|
|
93 categorize(const CodeCompletionResult &R) {
|
|
94 if (R.Declaration)
|
|
95 return categorize(*R.Declaration);
|
|
96 if (R.Kind == CodeCompletionResult::RK_Macro)
|
|
97 return SymbolQualitySignals::Macro;
|
|
98 // Everything else is a keyword or a pattern. Patterns are mostly keywords
|
|
99 // too, except a few which we recognize by cursor kind.
|
|
100 switch (R.CursorKind) {
|
|
101 case CXCursor_CXXMethod:
|
|
102 return SymbolQualitySignals::Function;
|
|
103 case CXCursor_ModuleImportDecl:
|
|
104 return SymbolQualitySignals::Namespace;
|
|
105 case CXCursor_MacroDefinition:
|
|
106 return SymbolQualitySignals::Macro;
|
|
107 case CXCursor_TypeRef:
|
|
108 return SymbolQualitySignals::Type;
|
|
109 case CXCursor_MemberRef:
|
|
110 return SymbolQualitySignals::Variable;
|
|
111 case CXCursor_Constructor:
|
|
112 return SymbolQualitySignals::Constructor;
|
|
113 default:
|
|
114 return SymbolQualitySignals::Keyword;
|
|
115 }
|
|
116 }
|
|
117
|
|
118 static SymbolQualitySignals::SymbolCategory
|
|
119 categorize(const index::SymbolInfo &D) {
|
|
120 switch (D.Kind) {
|
|
121 case index::SymbolKind::Namespace:
|
|
122 case index::SymbolKind::NamespaceAlias:
|
|
123 return SymbolQualitySignals::Namespace;
|
|
124 case index::SymbolKind::Macro:
|
|
125 return SymbolQualitySignals::Macro;
|
|
126 case index::SymbolKind::Enum:
|
|
127 case index::SymbolKind::Struct:
|
|
128 case index::SymbolKind::Class:
|
|
129 case index::SymbolKind::Protocol:
|
|
130 case index::SymbolKind::Extension:
|
|
131 case index::SymbolKind::Union:
|
|
132 case index::SymbolKind::TypeAlias:
|
173
|
133 case index::SymbolKind::TemplateTypeParm:
|
|
134 case index::SymbolKind::TemplateTemplateParm:
|
150
|
135 return SymbolQualitySignals::Type;
|
|
136 case index::SymbolKind::Function:
|
|
137 case index::SymbolKind::ClassMethod:
|
|
138 case index::SymbolKind::InstanceMethod:
|
|
139 case index::SymbolKind::StaticMethod:
|
|
140 case index::SymbolKind::InstanceProperty:
|
|
141 case index::SymbolKind::ClassProperty:
|
|
142 case index::SymbolKind::StaticProperty:
|
|
143 case index::SymbolKind::ConversionFunction:
|
|
144 return SymbolQualitySignals::Function;
|
|
145 case index::SymbolKind::Destructor:
|
|
146 return SymbolQualitySignals::Destructor;
|
|
147 case index::SymbolKind::Constructor:
|
|
148 return SymbolQualitySignals::Constructor;
|
|
149 case index::SymbolKind::Variable:
|
|
150 case index::SymbolKind::Field:
|
|
151 case index::SymbolKind::EnumConstant:
|
|
152 case index::SymbolKind::Parameter:
|
173
|
153 case index::SymbolKind::NonTypeTemplateParm:
|
150
|
154 return SymbolQualitySignals::Variable;
|
|
155 case index::SymbolKind::Using:
|
|
156 case index::SymbolKind::Module:
|
|
157 case index::SymbolKind::Unknown:
|
|
158 return SymbolQualitySignals::Unknown;
|
|
159 }
|
|
160 llvm_unreachable("Unknown index::SymbolKind");
|
|
161 }
|
|
162
|
|
163 static bool isInstanceMember(const NamedDecl *ND) {
|
|
164 if (!ND)
|
|
165 return false;
|
|
166 if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND))
|
|
167 ND = TP->TemplateDecl::getTemplatedDecl();
|
|
168 if (const auto *CM = dyn_cast<CXXMethodDecl>(ND))
|
|
169 return !CM->isStatic();
|
|
170 return isa<FieldDecl>(ND); // Note that static fields are VarDecl.
|
|
171 }
|
|
172
|
|
173 static bool isInstanceMember(const index::SymbolInfo &D) {
|
|
174 switch (D.Kind) {
|
|
175 case index::SymbolKind::InstanceMethod:
|
|
176 case index::SymbolKind::InstanceProperty:
|
|
177 case index::SymbolKind::Field:
|
|
178 return true;
|
|
179 default:
|
|
180 return false;
|
|
181 }
|
|
182 }
|
|
183
|
|
184 void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
|
|
185 Deprecated |= (SemaCCResult.Availability == CXAvailability_Deprecated);
|
|
186 Category = categorize(SemaCCResult);
|
|
187
|
|
188 if (SemaCCResult.Declaration) {
|
|
189 ImplementationDetail |= isImplementationDetail(SemaCCResult.Declaration);
|
|
190 if (auto *ID = SemaCCResult.Declaration->getIdentifier())
|
|
191 ReservedName = ReservedName || isReserved(ID->getName());
|
|
192 } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
|
|
193 ReservedName = ReservedName || isReserved(SemaCCResult.Macro->getName());
|
|
194 }
|
|
195
|
|
196 void SymbolQualitySignals::merge(const Symbol &IndexResult) {
|
|
197 Deprecated |= (IndexResult.Flags & Symbol::Deprecated);
|
|
198 ImplementationDetail |= (IndexResult.Flags & Symbol::ImplementationDetail);
|
|
199 References = std::max(IndexResult.References, References);
|
|
200 Category = categorize(IndexResult.SymInfo);
|
|
201 ReservedName = ReservedName || isReserved(IndexResult.Name);
|
|
202 }
|
|
203
|
221
|
204 float SymbolQualitySignals::evaluateHeuristics() const {
|
150
|
205 float Score = 1;
|
|
206
|
|
207 // This avoids a sharp gradient for tail symbols, and also neatly avoids the
|
|
208 // question of whether 0 references means a bad symbol or missing data.
|
|
209 if (References >= 10) {
|
|
210 // Use a sigmoid style boosting function, which flats out nicely for large
|
173
|
211 // numbers (e.g. 2.58 for 1M references).
|
150
|
212 // The following boosting function is equivalent to:
|
|
213 // m = 0.06
|
|
214 // f = 12.0
|
|
215 // boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59
|
|
216 // Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82),
|
|
217 // (10K, 2.21), (100K, 2.58), (1M, 2.94)
|
|
218 float S = std::pow(References, -0.06);
|
|
219 Score *= 6.0 * (1 - S) / (1 + S) + 0.59;
|
|
220 }
|
|
221
|
|
222 if (Deprecated)
|
|
223 Score *= 0.1f;
|
|
224 if (ReservedName)
|
|
225 Score *= 0.1f;
|
|
226 if (ImplementationDetail)
|
|
227 Score *= 0.2f;
|
|
228
|
|
229 switch (Category) {
|
|
230 case Keyword: // Often relevant, but misses most signals.
|
|
231 Score *= 4; // FIXME: important keywords should have specific boosts.
|
|
232 break;
|
|
233 case Type:
|
|
234 case Function:
|
|
235 case Variable:
|
|
236 Score *= 1.1f;
|
|
237 break;
|
|
238 case Namespace:
|
|
239 Score *= 0.8f;
|
|
240 break;
|
|
241 case Macro:
|
|
242 case Destructor:
|
|
243 case Operator:
|
|
244 Score *= 0.5f;
|
|
245 break;
|
|
246 case Constructor: // No boost constructors so they are after class types.
|
|
247 case Unknown:
|
|
248 break;
|
|
249 }
|
|
250
|
|
251 return Score;
|
|
252 }
|
|
253
|
|
254 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
|
|
255 const SymbolQualitySignals &S) {
|
221
|
256 OS << llvm::formatv("=== Symbol quality: {0}\n", S.evaluateHeuristics());
|
150
|
257 OS << llvm::formatv("\tReferences: {0}\n", S.References);
|
|
258 OS << llvm::formatv("\tDeprecated: {0}\n", S.Deprecated);
|
|
259 OS << llvm::formatv("\tReserved name: {0}\n", S.ReservedName);
|
221
|
260 OS << llvm::formatv("\tImplementation detail: {0}\n", S.ImplementationDetail);
|
150
|
261 OS << llvm::formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
|
|
262 return OS;
|
|
263 }
|
|
264
|
|
265 static SymbolRelevanceSignals::AccessibleScope
|
|
266 computeScope(const NamedDecl *D) {
|
|
267 // Injected "Foo" within the class "Foo" has file scope, not class scope.
|
|
268 const DeclContext *DC = D->getDeclContext();
|
|
269 if (auto *R = dyn_cast_or_null<RecordDecl>(D))
|
|
270 if (R->isInjectedClassName())
|
|
271 DC = DC->getParent();
|
|
272 // Class constructor should have the same scope as the class.
|
|
273 if (isa<CXXConstructorDecl>(D))
|
|
274 DC = DC->getParent();
|
|
275 bool InClass = false;
|
|
276 for (; !DC->isFileContext(); DC = DC->getParent()) {
|
|
277 if (DC->isFunctionOrMethod())
|
|
278 return SymbolRelevanceSignals::FunctionScope;
|
|
279 InClass = InClass || DC->isRecord();
|
|
280 }
|
|
281 if (InClass)
|
|
282 return SymbolRelevanceSignals::ClassScope;
|
|
283 // ExternalLinkage threshold could be tweaked, e.g. module-visible as global.
|
|
284 // Avoid caching linkage if it may change after enclosing code completion.
|
|
285 if (hasUnstableLinkage(D) || D->getLinkageInternal() < ExternalLinkage)
|
|
286 return SymbolRelevanceSignals::FileScope;
|
|
287 return SymbolRelevanceSignals::GlobalScope;
|
|
288 }
|
|
289
|
|
290 void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
|
|
291 SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
|
|
292 SymbolScope = IndexResult.Scope;
|
|
293 IsInstanceMember |= isInstanceMember(IndexResult.SymInfo);
|
|
294 if (!(IndexResult.Flags & Symbol::VisibleOutsideFile)) {
|
|
295 Scope = AccessibleScope::FileScope;
|
|
296 }
|
221
|
297 if (MainFileSignals) {
|
|
298 MainFileRefs =
|
|
299 std::max(MainFileRefs,
|
|
300 MainFileSignals->ReferencedSymbols.lookup(IndexResult.ID));
|
|
301 ScopeRefsInFile =
|
|
302 std::max(ScopeRefsInFile,
|
|
303 MainFileSignals->RelatedNamespaces.lookup(IndexResult.Scope));
|
|
304 }
|
|
305 }
|
|
306
|
|
307 void SymbolRelevanceSignals::computeASTSignals(
|
|
308 const CodeCompletionResult &SemaResult) {
|
|
309 if (!MainFileSignals)
|
|
310 return;
|
|
311 if ((SemaResult.Kind != CodeCompletionResult::RK_Declaration) &&
|
|
312 (SemaResult.Kind != CodeCompletionResult::RK_Pattern))
|
|
313 return;
|
|
314 if (const NamedDecl *ND = SemaResult.getDeclaration()) {
|
|
315 auto ID = getSymbolID(ND);
|
|
316 if (!ID)
|
|
317 return;
|
|
318 MainFileRefs =
|
|
319 std::max(MainFileRefs, MainFileSignals->ReferencedSymbols.lookup(ID));
|
|
320 if (const auto *NSD = dyn_cast<NamespaceDecl>(ND->getDeclContext())) {
|
|
321 if (NSD->isAnonymousNamespace())
|
|
322 return;
|
|
323 std::string Scope = printNamespaceScope(*NSD);
|
|
324 if (!Scope.empty())
|
|
325 ScopeRefsInFile = std::max(
|
|
326 ScopeRefsInFile, MainFileSignals->RelatedNamespaces.lookup(Scope));
|
|
327 }
|
|
328 }
|
150
|
329 }
|
|
330
|
|
331 void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
|
|
332 if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
|
|
333 SemaCCResult.Availability == CXAvailability_NotAccessible)
|
|
334 Forbidden = true;
|
|
335
|
|
336 if (SemaCCResult.Declaration) {
|
|
337 SemaSaysInScope = true;
|
|
338 // We boost things that have decls in the main file. We give a fixed score
|
|
339 // for all other declarations in sema as they are already included in the
|
|
340 // translation unit.
|
|
341 float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) ||
|
|
342 hasUsingDeclInMainFile(SemaCCResult))
|
|
343 ? 1.0
|
|
344 : 0.6;
|
|
345 SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore);
|
|
346 IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration);
|
|
347 InBaseClass |= SemaCCResult.InBaseClass;
|
|
348 }
|
|
349
|
221
|
350 computeASTSignals(SemaCCResult);
|
150
|
351 // Declarations are scoped, others (like macros) are assumed global.
|
|
352 if (SemaCCResult.Declaration)
|
|
353 Scope = std::min(Scope, computeScope(SemaCCResult.Declaration));
|
|
354
|
|
355 NeedsFixIts = !SemaCCResult.FixIts.empty();
|
|
356 }
|
|
357
|
221
|
358 static float fileProximityScore(unsigned FileDistance) {
|
|
359 // Range: [0, 1]
|
|
360 // FileDistance = [0, 1, 2, 3, 4, .., FileDistance::Unreachable]
|
|
361 // Score = [1, 0.82, 0.67, 0.55, 0.45, .., 0]
|
|
362 if (FileDistance == FileDistance::Unreachable)
|
|
363 return 0;
|
150
|
364 // Assume approximately default options are used for sensible scoring.
|
221
|
365 return std::exp(FileDistance * -0.4f / FileDistanceOptions().UpCost);
|
150
|
366 }
|
|
367
|
221
|
368 static float scopeProximityScore(unsigned ScopeDistance) {
|
|
369 // Range: [0.6, 2].
|
|
370 // ScopeDistance = [0, 1, 2, 3, 4, 5, 6, 7, .., FileDistance::Unreachable]
|
|
371 // Score = [2.0, 1.55, 1.2, 0.93, 0.72, 0.65, 0.65, 0.65, .., 0.6]
|
|
372 if (ScopeDistance == FileDistance::Unreachable)
|
150
|
373 return 0.6f;
|
221
|
374 return std::max(0.65, 2.0 * std::pow(0.6, ScopeDistance / 2.0));
|
150
|
375 }
|
|
376
|
|
377 static llvm::Optional<llvm::StringRef>
|
|
378 wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) {
|
|
379 if (ContextWords)
|
221
|
380 for (const auto &Word : ContextWords->keys())
|
150
|
381 if (Name.contains_lower(Word))
|
|
382 return Word;
|
|
383 return llvm::None;
|
|
384 }
|
|
385
|
221
|
386 SymbolRelevanceSignals::DerivedSignals
|
|
387 SymbolRelevanceSignals::calculateDerivedSignals() const {
|
|
388 DerivedSignals Derived;
|
|
389 Derived.NameMatchesContext = wordMatching(Name, ContextWords).hasValue();
|
|
390 Derived.FileProximityDistance = !FileProximityMatch || SymbolURI.empty()
|
|
391 ? FileDistance::Unreachable
|
|
392 : FileProximityMatch->distance(SymbolURI);
|
|
393 if (ScopeProximityMatch) {
|
|
394 // For global symbol, the distance is 0.
|
|
395 Derived.ScopeProximityDistance =
|
|
396 SymbolScope ? ScopeProximityMatch->distance(*SymbolScope) : 0;
|
|
397 }
|
|
398 return Derived;
|
|
399 }
|
|
400
|
|
401 float SymbolRelevanceSignals::evaluateHeuristics() const {
|
|
402 DerivedSignals Derived = calculateDerivedSignals();
|
150
|
403 float Score = 1;
|
|
404
|
|
405 if (Forbidden)
|
|
406 return 0;
|
|
407
|
|
408 Score *= NameMatch;
|
|
409
|
|
410 // File proximity scores are [0,1] and we translate them into a multiplier in
|
|
411 // the range from 1 to 3.
|
221
|
412 Score *= 1 + 2 * std::max(fileProximityScore(Derived.FileProximityDistance),
|
150
|
413 SemaFileProximityScore);
|
|
414
|
|
415 if (ScopeProximityMatch)
|
|
416 // Use a constant scope boost for sema results, as scopes of sema results
|
|
417 // can be tricky (e.g. class/function scope). Set to the max boost as we
|
|
418 // don't load top-level symbols from the preamble and sema results are
|
|
419 // always in the accessible scope.
|
221
|
420 Score *= SemaSaysInScope
|
|
421 ? 2.0
|
|
422 : scopeProximityScore(Derived.ScopeProximityDistance);
|
150
|
423
|
221
|
424 if (Derived.NameMatchesContext)
|
150
|
425 Score *= 1.5;
|
|
426
|
|
427 // Symbols like local variables may only be referenced within their scope.
|
|
428 // Conversely if we're in that scope, it's likely we'll reference them.
|
|
429 if (Query == CodeComplete) {
|
|
430 // The narrower the scope where a symbol is visible, the more likely it is
|
|
431 // to be relevant when it is available.
|
|
432 switch (Scope) {
|
|
433 case GlobalScope:
|
|
434 break;
|
|
435 case FileScope:
|
|
436 Score *= 1.5f;
|
|
437 break;
|
|
438 case ClassScope:
|
|
439 Score *= 2;
|
|
440 break;
|
|
441 case FunctionScope:
|
|
442 Score *= 4;
|
|
443 break;
|
|
444 }
|
|
445 } else {
|
|
446 // For non-completion queries, the wider the scope where a symbol is
|
|
447 // visible, the more likely it is to be relevant.
|
|
448 switch (Scope) {
|
|
449 case GlobalScope:
|
|
450 break;
|
|
451 case FileScope:
|
|
452 Score *= 0.5f;
|
|
453 break;
|
|
454 default:
|
|
455 // TODO: Handle other scopes as we start to use them for index results.
|
|
456 break;
|
|
457 }
|
|
458 }
|
|
459
|
|
460 if (TypeMatchesPreferred)
|
|
461 Score *= 5.0;
|
|
462
|
|
463 // Penalize non-instance members when they are accessed via a class instance.
|
|
464 if (!IsInstanceMember &&
|
|
465 (Context == CodeCompletionContext::CCC_DotMemberAccess ||
|
|
466 Context == CodeCompletionContext::CCC_ArrowMemberAccess)) {
|
|
467 Score *= 0.2f;
|
|
468 }
|
|
469
|
|
470 if (InBaseClass)
|
|
471 Score *= 0.5f;
|
|
472
|
|
473 // Penalize for FixIts.
|
|
474 if (NeedsFixIts)
|
|
475 Score *= 0.5f;
|
|
476
|
221
|
477 // Use a sigmoid style boosting function similar to `References`, which flats
|
|
478 // out nicely for large values. This avoids a sharp gradient for heavily
|
|
479 // referenced symbols. Use smaller gradient for ScopeRefsInFile since ideally
|
|
480 // MainFileRefs <= ScopeRefsInFile.
|
|
481 if (MainFileRefs >= 2) {
|
|
482 // E.g.: (2, 1.12), (9, 2.0), (48, 3.0).
|
|
483 float S = std::pow(MainFileRefs, -0.11);
|
|
484 Score *= 11.0 * (1 - S) / (1 + S) + 0.7;
|
|
485 }
|
|
486 if (ScopeRefsInFile >= 2) {
|
|
487 // E.g.: (2, 1.04), (14, 2.0), (109, 3.0), (400, 3.6).
|
|
488 float S = std::pow(ScopeRefsInFile, -0.10);
|
|
489 Score *= 10.0 * (1 - S) / (1 + S) + 0.7;
|
|
490 }
|
|
491
|
150
|
492 return Score;
|
|
493 }
|
|
494
|
|
495 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
|
|
496 const SymbolRelevanceSignals &S) {
|
221
|
497 OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluateHeuristics());
|
150
|
498 OS << llvm::formatv("\tName: {0}\n", S.Name);
|
|
499 OS << llvm::formatv("\tName match: {0}\n", S.NameMatch);
|
|
500 if (S.ContextWords)
|
|
501 OS << llvm::formatv(
|
|
502 "\tMatching context word: {0}\n",
|
|
503 wordMatching(S.Name, S.ContextWords).getValueOr("<none>"));
|
|
504 OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden);
|
|
505 OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts);
|
|
506 OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
|
221
|
507 OS << llvm::formatv("\tInBaseClass: {0}\n", S.InBaseClass);
|
150
|
508 OS << llvm::formatv("\tContext: {0}\n", getCompletionKindString(S.Context));
|
|
509 OS << llvm::formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
|
|
510 OS << llvm::formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
|
|
511
|
|
512 OS << llvm::formatv("\tSymbol URI: {0}\n", S.SymbolURI);
|
|
513 OS << llvm::formatv("\tSymbol scope: {0}\n",
|
|
514 S.SymbolScope ? *S.SymbolScope : "<None>");
|
|
515
|
221
|
516 SymbolRelevanceSignals::DerivedSignals Derived = S.calculateDerivedSignals();
|
150
|
517 if (S.FileProximityMatch) {
|
221
|
518 unsigned Score = fileProximityScore(Derived.FileProximityDistance);
|
|
519 OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", Score,
|
|
520 Derived.FileProximityDistance);
|
150
|
521 }
|
|
522 OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore);
|
|
523
|
|
524 OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope);
|
|
525 if (S.ScopeProximityMatch)
|
|
526 OS << llvm::formatv("\tIndex scope boost: {0}\n",
|
221
|
527 scopeProximityScore(Derived.ScopeProximityDistance));
|
150
|
528
|
|
529 OS << llvm::formatv(
|
|
530 "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n",
|
|
531 S.TypeMatchesPreferred, S.HadContextType, S.HadSymbolType);
|
|
532
|
|
533 return OS;
|
|
534 }
|
|
535
|
|
536 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
|
|
537 return SymbolQuality * SymbolRelevance;
|
|
538 }
|
|
539
|
221
|
540 DecisionForestScores
|
|
541 evaluateDecisionForest(const SymbolQualitySignals &Quality,
|
|
542 const SymbolRelevanceSignals &Relevance, float Base) {
|
|
543 Example E;
|
|
544 E.setIsDeprecated(Quality.Deprecated);
|
|
545 E.setIsReservedName(Quality.ReservedName);
|
|
546 E.setIsImplementationDetail(Quality.ImplementationDetail);
|
|
547 E.setNumReferences(Quality.References);
|
|
548 E.setSymbolCategory(Quality.Category);
|
|
549
|
|
550 SymbolRelevanceSignals::DerivedSignals Derived =
|
|
551 Relevance.calculateDerivedSignals();
|
|
552 int NumMatch = 0;
|
|
553 if (Relevance.ContextWords) {
|
|
554 for (const auto &Word : Relevance.ContextWords->keys()) {
|
|
555 if (Relevance.Name.contains_lower(Word)) {
|
|
556 ++NumMatch;
|
|
557 }
|
|
558 }
|
|
559 }
|
|
560 E.setIsNameInContext(NumMatch > 0);
|
|
561 E.setNumNameInContext(NumMatch);
|
|
562 E.setFractionNameInContext(
|
|
563 Relevance.ContextWords && !Relevance.ContextWords->empty()
|
|
564 ? NumMatch * 1.0 / Relevance.ContextWords->size()
|
|
565 : 0);
|
|
566 E.setIsInBaseClass(Relevance.InBaseClass);
|
|
567 E.setFileProximityDistanceCost(Derived.FileProximityDistance);
|
|
568 E.setSemaFileProximityScore(Relevance.SemaFileProximityScore);
|
|
569 E.setSymbolScopeDistanceCost(Derived.ScopeProximityDistance);
|
|
570 E.setSemaSaysInScope(Relevance.SemaSaysInScope);
|
|
571 E.setScope(Relevance.Scope);
|
|
572 E.setContextKind(Relevance.Context);
|
|
573 E.setIsInstanceMember(Relevance.IsInstanceMember);
|
|
574 E.setHadContextType(Relevance.HadContextType);
|
|
575 E.setHadSymbolType(Relevance.HadSymbolType);
|
|
576 E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred);
|
|
577
|
|
578 DecisionForestScores Scores;
|
|
579 // Exponentiating DecisionForest prediction makes the score of each tree a
|
|
580 // multiplciative boost (like NameMatch). This allows us to weigh the
|
|
581 // prediciton score and NameMatch appropriately.
|
|
582 Scores.ExcludingName = pow(Base, Evaluate(E));
|
|
583 // Following cases are not part of the generated training dataset:
|
|
584 // - Symbols with `NeedsFixIts`.
|
|
585 // - Forbidden symbols.
|
|
586 // - Keywords: Dataset contains only macros and decls.
|
|
587 if (Relevance.NeedsFixIts)
|
|
588 Scores.ExcludingName *= 0.5;
|
|
589 if (Relevance.Forbidden)
|
|
590 Scores.ExcludingName *= 0;
|
|
591 if (Quality.Category == SymbolQualitySignals::Keyword)
|
|
592 Scores.ExcludingName *= 4;
|
|
593
|
|
594 // NameMatch should be a multiplier on total score to support rescoring.
|
|
595 Scores.Total = Relevance.NameMatch * Scores.ExcludingName;
|
|
596 return Scores;
|
|
597 }
|
|
598
|
150
|
599 // Produces an integer that sorts in the same order as F.
|
|
600 // That is: a < b <==> encodeFloat(a) < encodeFloat(b).
|
|
601 static uint32_t encodeFloat(float F) {
|
|
602 static_assert(std::numeric_limits<float>::is_iec559, "");
|
|
603 constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
|
|
604
|
|
605 // Get the bits of the float. Endianness is the same as for integers.
|
|
606 uint32_t U = llvm::FloatToBits(F);
|
|
607 // IEEE 754 floats compare like sign-magnitude integers.
|
|
608 if (U & TopBit) // Negative float.
|
|
609 return 0 - U; // Map onto the low half of integers, order reversed.
|
|
610 return U + TopBit; // Positive floats map onto the high half of integers.
|
|
611 }
|
|
612
|
|
613 std::string sortText(float Score, llvm::StringRef Name) {
|
|
614 // We convert -Score to an integer, and hex-encode for readability.
|
|
615 // Example: [0.5, "foo"] -> "41000000foo"
|
|
616 std::string S;
|
|
617 llvm::raw_string_ostream OS(S);
|
|
618 llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
|
|
619 /*Width=*/2 * sizeof(Score));
|
|
620 OS << Name;
|
|
621 OS.flush();
|
|
622 return S;
|
|
623 }
|
|
624
|
|
625 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
|
|
626 const SignatureQualitySignals &S) {
|
|
627 OS << llvm::formatv("=== Signature Quality:\n");
|
|
628 OS << llvm::formatv("\tNumber of parameters: {0}\n", S.NumberOfParameters);
|
|
629 OS << llvm::formatv("\tNumber of optional parameters: {0}\n",
|
|
630 S.NumberOfOptionalParameters);
|
|
631 OS << llvm::formatv("\tKind: {0}\n", S.Kind);
|
|
632 return OS;
|
|
633 }
|
|
634
|
|
635 } // namespace clangd
|
|
636 } // namespace clang
|