comparison clang-tools-extra/clangd/Quality.cpp @ 150:1d019706d866

LLVM10
author anatofuz
date Thu, 13 Feb 2020 15:10:13 +0900
parents
children 0572611fdcc8
comparison
equal deleted inserted replaced
147:c2174574ed3a 150:1d019706d866
1 //===--- Quality.cpp ---------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Quality.h"
10 #include "AST.h"
11 #include "FileDistance.h"
12 #include "SourceCode.h"
13 #include "URI.h"
14 #include "index/Symbol.h"
15 #include "clang/AST/ASTContext.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/DeclTemplate.h"
19 #include "clang/AST/DeclVisitor.h"
20 #include "clang/Basic/CharInfo.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Sema/CodeCompleteConsumer.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/FormatVariadic.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <algorithm>
33 #include <cmath>
34
35 namespace clang {
36 namespace clangd {
37 static bool isReserved(llvm::StringRef Name) {
38 // FIXME: Should we exclude _Bool and others recognized by the standard?
39 return Name.size() >= 2 && Name[0] == '_' &&
40 (isUppercase(Name[1]) || Name[1] == '_');
41 }
42
43 static bool hasDeclInMainFile(const Decl &D) {
44 auto &SourceMgr = D.getASTContext().getSourceManager();
45 for (auto *Redecl : D.redecls()) {
46 if (isInsideMainFile(Redecl->getLocation(), SourceMgr))
47 return true;
48 }
49 return false;
50 }
51
52 static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) {
53 const auto &Context = R.Declaration->getASTContext();
54 const auto &SourceMgr = Context.getSourceManager();
55 if (R.ShadowDecl) {
56 if (isInsideMainFile(R.ShadowDecl->getLocation(), SourceMgr))
57 return true;
58 }
59 return false;
60 }
61
62 static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
63 if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) {
64 if (FD->isOverloadedOperator())
65 return SymbolQualitySignals::Operator;
66 }
67 class Switch
68 : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
69 public:
70 #define MAP(DeclType, Category) \
71 SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
72 return SymbolQualitySignals::Category; \
73 }
74 MAP(NamespaceDecl, Namespace);
75 MAP(NamespaceAliasDecl, Namespace);
76 MAP(TypeDecl, Type);
77 MAP(TypeAliasTemplateDecl, Type);
78 MAP(ClassTemplateDecl, Type);
79 MAP(CXXConstructorDecl, Constructor);
80 MAP(CXXDestructorDecl, Destructor);
81 MAP(ValueDecl, Variable);
82 MAP(VarTemplateDecl, Variable);
83 MAP(FunctionDecl, Function);
84 MAP(FunctionTemplateDecl, Function);
85 MAP(Decl, Unknown);
86 #undef MAP
87 };
88 return Switch().Visit(&ND);
89 }
90
91 static SymbolQualitySignals::SymbolCategory
92 categorize(const CodeCompletionResult &R) {
93 if (R.Declaration)
94 return categorize(*R.Declaration);
95 if (R.Kind == CodeCompletionResult::RK_Macro)
96 return SymbolQualitySignals::Macro;
97 // Everything else is a keyword or a pattern. Patterns are mostly keywords
98 // too, except a few which we recognize by cursor kind.
99 switch (R.CursorKind) {
100 case CXCursor_CXXMethod:
101 return SymbolQualitySignals::Function;
102 case CXCursor_ModuleImportDecl:
103 return SymbolQualitySignals::Namespace;
104 case CXCursor_MacroDefinition:
105 return SymbolQualitySignals::Macro;
106 case CXCursor_TypeRef:
107 return SymbolQualitySignals::Type;
108 case CXCursor_MemberRef:
109 return SymbolQualitySignals::Variable;
110 case CXCursor_Constructor:
111 return SymbolQualitySignals::Constructor;
112 default:
113 return SymbolQualitySignals::Keyword;
114 }
115 }
116
117 static SymbolQualitySignals::SymbolCategory
118 categorize(const index::SymbolInfo &D) {
119 switch (D.Kind) {
120 case index::SymbolKind::Namespace:
121 case index::SymbolKind::NamespaceAlias:
122 return SymbolQualitySignals::Namespace;
123 case index::SymbolKind::Macro:
124 return SymbolQualitySignals::Macro;
125 case index::SymbolKind::Enum:
126 case index::SymbolKind::Struct:
127 case index::SymbolKind::Class:
128 case index::SymbolKind::Protocol:
129 case index::SymbolKind::Extension:
130 case index::SymbolKind::Union:
131 case index::SymbolKind::TypeAlias:
132 return SymbolQualitySignals::Type;
133 case index::SymbolKind::Function:
134 case index::SymbolKind::ClassMethod:
135 case index::SymbolKind::InstanceMethod:
136 case index::SymbolKind::StaticMethod:
137 case index::SymbolKind::InstanceProperty:
138 case index::SymbolKind::ClassProperty:
139 case index::SymbolKind::StaticProperty:
140 case index::SymbolKind::ConversionFunction:
141 return SymbolQualitySignals::Function;
142 case index::SymbolKind::Destructor:
143 return SymbolQualitySignals::Destructor;
144 case index::SymbolKind::Constructor:
145 return SymbolQualitySignals::Constructor;
146 case index::SymbolKind::Variable:
147 case index::SymbolKind::Field:
148 case index::SymbolKind::EnumConstant:
149 case index::SymbolKind::Parameter:
150 return SymbolQualitySignals::Variable;
151 case index::SymbolKind::Using:
152 case index::SymbolKind::Module:
153 case index::SymbolKind::Unknown:
154 return SymbolQualitySignals::Unknown;
155 }
156 llvm_unreachable("Unknown index::SymbolKind");
157 }
158
159 static bool isInstanceMember(const NamedDecl *ND) {
160 if (!ND)
161 return false;
162 if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND))
163 ND = TP->TemplateDecl::getTemplatedDecl();
164 if (const auto *CM = dyn_cast<CXXMethodDecl>(ND))
165 return !CM->isStatic();
166 return isa<FieldDecl>(ND); // Note that static fields are VarDecl.
167 }
168
169 static bool isInstanceMember(const index::SymbolInfo &D) {
170 switch (D.Kind) {
171 case index::SymbolKind::InstanceMethod:
172 case index::SymbolKind::InstanceProperty:
173 case index::SymbolKind::Field:
174 return true;
175 default:
176 return false;
177 }
178 }
179
180 void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
181 Deprecated |= (SemaCCResult.Availability == CXAvailability_Deprecated);
182 Category = categorize(SemaCCResult);
183
184 if (SemaCCResult.Declaration) {
185 ImplementationDetail |= isImplementationDetail(SemaCCResult.Declaration);
186 if (auto *ID = SemaCCResult.Declaration->getIdentifier())
187 ReservedName = ReservedName || isReserved(ID->getName());
188 } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
189 ReservedName = ReservedName || isReserved(SemaCCResult.Macro->getName());
190 }
191
192 void SymbolQualitySignals::merge(const Symbol &IndexResult) {
193 Deprecated |= (IndexResult.Flags & Symbol::Deprecated);
194 ImplementationDetail |= (IndexResult.Flags & Symbol::ImplementationDetail);
195 References = std::max(IndexResult.References, References);
196 Category = categorize(IndexResult.SymInfo);
197 ReservedName = ReservedName || isReserved(IndexResult.Name);
198 }
199
200 float SymbolQualitySignals::evaluate() const {
201 float Score = 1;
202
203 // This avoids a sharp gradient for tail symbols, and also neatly avoids the
204 // question of whether 0 references means a bad symbol or missing data.
205 if (References >= 10) {
206 // Use a sigmoid style boosting function, which flats out nicely for large
207 // numbers (e.g. 2.58 for 1M refererences).
208 // The following boosting function is equivalent to:
209 // m = 0.06
210 // f = 12.0
211 // boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59
212 // Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82),
213 // (10K, 2.21), (100K, 2.58), (1M, 2.94)
214 float S = std::pow(References, -0.06);
215 Score *= 6.0 * (1 - S) / (1 + S) + 0.59;
216 }
217
218 if (Deprecated)
219 Score *= 0.1f;
220 if (ReservedName)
221 Score *= 0.1f;
222 if (ImplementationDetail)
223 Score *= 0.2f;
224
225 switch (Category) {
226 case Keyword: // Often relevant, but misses most signals.
227 Score *= 4; // FIXME: important keywords should have specific boosts.
228 break;
229 case Type:
230 case Function:
231 case Variable:
232 Score *= 1.1f;
233 break;
234 case Namespace:
235 Score *= 0.8f;
236 break;
237 case Macro:
238 case Destructor:
239 case Operator:
240 Score *= 0.5f;
241 break;
242 case Constructor: // No boost constructors so they are after class types.
243 case Unknown:
244 break;
245 }
246
247 return Score;
248 }
249
250 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
251 const SymbolQualitySignals &S) {
252 OS << llvm::formatv("=== Symbol quality: {0}\n", S.evaluate());
253 OS << llvm::formatv("\tReferences: {0}\n", S.References);
254 OS << llvm::formatv("\tDeprecated: {0}\n", S.Deprecated);
255 OS << llvm::formatv("\tReserved name: {0}\n", S.ReservedName);
256 OS << llvm::formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
257 return OS;
258 }
259
260 static SymbolRelevanceSignals::AccessibleScope
261 computeScope(const NamedDecl *D) {
262 // Injected "Foo" within the class "Foo" has file scope, not class scope.
263 const DeclContext *DC = D->getDeclContext();
264 if (auto *R = dyn_cast_or_null<RecordDecl>(D))
265 if (R->isInjectedClassName())
266 DC = DC->getParent();
267 // Class constructor should have the same scope as the class.
268 if (isa<CXXConstructorDecl>(D))
269 DC = DC->getParent();
270 bool InClass = false;
271 for (; !DC->isFileContext(); DC = DC->getParent()) {
272 if (DC->isFunctionOrMethod())
273 return SymbolRelevanceSignals::FunctionScope;
274 InClass = InClass || DC->isRecord();
275 }
276 if (InClass)
277 return SymbolRelevanceSignals::ClassScope;
278 // ExternalLinkage threshold could be tweaked, e.g. module-visible as global.
279 // Avoid caching linkage if it may change after enclosing code completion.
280 if (hasUnstableLinkage(D) || D->getLinkageInternal() < ExternalLinkage)
281 return SymbolRelevanceSignals::FileScope;
282 return SymbolRelevanceSignals::GlobalScope;
283 }
284
285 void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
286 SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
287 SymbolScope = IndexResult.Scope;
288 IsInstanceMember |= isInstanceMember(IndexResult.SymInfo);
289 if (!(IndexResult.Flags & Symbol::VisibleOutsideFile)) {
290 Scope = AccessibleScope::FileScope;
291 }
292 }
293
294 void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
295 if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
296 SemaCCResult.Availability == CXAvailability_NotAccessible)
297 Forbidden = true;
298
299 if (SemaCCResult.Declaration) {
300 SemaSaysInScope = true;
301 // We boost things that have decls in the main file. We give a fixed score
302 // for all other declarations in sema as they are already included in the
303 // translation unit.
304 float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) ||
305 hasUsingDeclInMainFile(SemaCCResult))
306 ? 1.0
307 : 0.6;
308 SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore);
309 IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration);
310 InBaseClass |= SemaCCResult.InBaseClass;
311 }
312
313 // Declarations are scoped, others (like macros) are assumed global.
314 if (SemaCCResult.Declaration)
315 Scope = std::min(Scope, computeScope(SemaCCResult.Declaration));
316
317 NeedsFixIts = !SemaCCResult.FixIts.empty();
318 }
319
320 static std::pair<float, unsigned> uriProximity(llvm::StringRef SymbolURI,
321 URIDistance *D) {
322 if (!D || SymbolURI.empty())
323 return {0.f, 0u};
324 unsigned Distance = D->distance(SymbolURI);
325 // Assume approximately default options are used for sensible scoring.
326 return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
327 }
328
329 static float scopeBoost(ScopeDistance &Distance,
330 llvm::Optional<llvm::StringRef> SymbolScope) {
331 if (!SymbolScope)
332 return 1;
333 auto D = Distance.distance(*SymbolScope);
334 if (D == FileDistance::Unreachable)
335 return 0.6f;
336 return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0));
337 }
338
339 static llvm::Optional<llvm::StringRef>
340 wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) {
341 if (ContextWords)
342 for (const auto& Word : ContextWords->keys())
343 if (Name.contains_lower(Word))
344 return Word;
345 return llvm::None;
346 }
347
348 float SymbolRelevanceSignals::evaluate() const {
349 float Score = 1;
350
351 if (Forbidden)
352 return 0;
353
354 Score *= NameMatch;
355
356 // File proximity scores are [0,1] and we translate them into a multiplier in
357 // the range from 1 to 3.
358 Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first,
359 SemaFileProximityScore);
360
361 if (ScopeProximityMatch)
362 // Use a constant scope boost for sema results, as scopes of sema results
363 // can be tricky (e.g. class/function scope). Set to the max boost as we
364 // don't load top-level symbols from the preamble and sema results are
365 // always in the accessible scope.
366 Score *=
367 SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope);
368
369 if (wordMatching(Name, ContextWords))
370 Score *= 1.5;
371
372 // Symbols like local variables may only be referenced within their scope.
373 // Conversely if we're in that scope, it's likely we'll reference them.
374 if (Query == CodeComplete) {
375 // The narrower the scope where a symbol is visible, the more likely it is
376 // to be relevant when it is available.
377 switch (Scope) {
378 case GlobalScope:
379 break;
380 case FileScope:
381 Score *= 1.5f;
382 break;
383 case ClassScope:
384 Score *= 2;
385 break;
386 case FunctionScope:
387 Score *= 4;
388 break;
389 }
390 } else {
391 // For non-completion queries, the wider the scope where a symbol is
392 // visible, the more likely it is to be relevant.
393 switch (Scope) {
394 case GlobalScope:
395 break;
396 case FileScope:
397 Score *= 0.5f;
398 break;
399 default:
400 // TODO: Handle other scopes as we start to use them for index results.
401 break;
402 }
403 }
404
405 if (TypeMatchesPreferred)
406 Score *= 5.0;
407
408 // Penalize non-instance members when they are accessed via a class instance.
409 if (!IsInstanceMember &&
410 (Context == CodeCompletionContext::CCC_DotMemberAccess ||
411 Context == CodeCompletionContext::CCC_ArrowMemberAccess)) {
412 Score *= 0.2f;
413 }
414
415 if (InBaseClass)
416 Score *= 0.5f;
417
418 // Penalize for FixIts.
419 if (NeedsFixIts)
420 Score *= 0.5f;
421
422 return Score;
423 }
424
425 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
426 const SymbolRelevanceSignals &S) {
427 OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluate());
428 OS << llvm::formatv("\tName: {0}\n", S.Name);
429 OS << llvm::formatv("\tName match: {0}\n", S.NameMatch);
430 if (S.ContextWords)
431 OS << llvm::formatv(
432 "\tMatching context word: {0}\n",
433 wordMatching(S.Name, S.ContextWords).getValueOr("<none>"));
434 OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden);
435 OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts);
436 OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
437 OS << llvm::formatv("\tContext: {0}\n", getCompletionKindString(S.Context));
438 OS << llvm::formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
439 OS << llvm::formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
440
441 OS << llvm::formatv("\tSymbol URI: {0}\n", S.SymbolURI);
442 OS << llvm::formatv("\tSymbol scope: {0}\n",
443 S.SymbolScope ? *S.SymbolScope : "<None>");
444
445 if (S.FileProximityMatch) {
446 auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch);
447 OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n",
448 Score.first, Score.second);
449 }
450 OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore);
451
452 OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope);
453 if (S.ScopeProximityMatch)
454 OS << llvm::formatv("\tIndex scope boost: {0}\n",
455 scopeBoost(*S.ScopeProximityMatch, S.SymbolScope));
456
457 OS << llvm::formatv(
458 "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n",
459 S.TypeMatchesPreferred, S.HadContextType, S.HadSymbolType);
460
461 return OS;
462 }
463
464 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
465 return SymbolQuality * SymbolRelevance;
466 }
467
468 // Produces an integer that sorts in the same order as F.
469 // That is: a < b <==> encodeFloat(a) < encodeFloat(b).
470 static uint32_t encodeFloat(float F) {
471 static_assert(std::numeric_limits<float>::is_iec559, "");
472 constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
473
474 // Get the bits of the float. Endianness is the same as for integers.
475 uint32_t U = llvm::FloatToBits(F);
476 // IEEE 754 floats compare like sign-magnitude integers.
477 if (U & TopBit) // Negative float.
478 return 0 - U; // Map onto the low half of integers, order reversed.
479 return U + TopBit; // Positive floats map onto the high half of integers.
480 }
481
482 std::string sortText(float Score, llvm::StringRef Name) {
483 // We convert -Score to an integer, and hex-encode for readability.
484 // Example: [0.5, "foo"] -> "41000000foo"
485 std::string S;
486 llvm::raw_string_ostream OS(S);
487 llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
488 /*Width=*/2 * sizeof(Score));
489 OS << Name;
490 OS.flush();
491 return S;
492 }
493
494 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
495 const SignatureQualitySignals &S) {
496 OS << llvm::formatv("=== Signature Quality:\n");
497 OS << llvm::formatv("\tNumber of parameters: {0}\n", S.NumberOfParameters);
498 OS << llvm::formatv("\tNumber of optional parameters: {0}\n",
499 S.NumberOfOptionalParameters);
500 OS << llvm::formatv("\tKind: {0}\n", S.Kind);
501 return OS;
502 }
503
504 } // namespace clangd
505 } // namespace clang