Mercurial > hg > CbC > CbC_llvm
comparison clang-tools-extra/clangd/Quality.cpp @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 0572611fdcc8 |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 //===--- Quality.cpp ---------------------------------------------*- C++-*-===// | |
2 // | |
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
4 // See https://llvm.org/LICENSE.txt for license information. | |
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
6 // | |
7 //===----------------------------------------------------------------------===// | |
8 | |
9 #include "Quality.h" | |
10 #include "AST.h" | |
11 #include "FileDistance.h" | |
12 #include "SourceCode.h" | |
13 #include "URI.h" | |
14 #include "index/Symbol.h" | |
15 #include "clang/AST/ASTContext.h" | |
16 #include "clang/AST/Decl.h" | |
17 #include "clang/AST/DeclCXX.h" | |
18 #include "clang/AST/DeclTemplate.h" | |
19 #include "clang/AST/DeclVisitor.h" | |
20 #include "clang/Basic/CharInfo.h" | |
21 #include "clang/Basic/SourceManager.h" | |
22 #include "clang/Sema/CodeCompleteConsumer.h" | |
23 #include "llvm/ADT/ArrayRef.h" | |
24 #include "llvm/ADT/SmallString.h" | |
25 #include "llvm/ADT/SmallVector.h" | |
26 #include "llvm/ADT/StringExtras.h" | |
27 #include "llvm/ADT/StringRef.h" | |
28 #include "llvm/Support/Casting.h" | |
29 #include "llvm/Support/FormatVariadic.h" | |
30 #include "llvm/Support/MathExtras.h" | |
31 #include "llvm/Support/raw_ostream.h" | |
32 #include <algorithm> | |
33 #include <cmath> | |
34 | |
35 namespace clang { | |
36 namespace clangd { | |
37 static bool isReserved(llvm::StringRef Name) { | |
38 // FIXME: Should we exclude _Bool and others recognized by the standard? | |
39 return Name.size() >= 2 && Name[0] == '_' && | |
40 (isUppercase(Name[1]) || Name[1] == '_'); | |
41 } | |
42 | |
43 static bool hasDeclInMainFile(const Decl &D) { | |
44 auto &SourceMgr = D.getASTContext().getSourceManager(); | |
45 for (auto *Redecl : D.redecls()) { | |
46 if (isInsideMainFile(Redecl->getLocation(), SourceMgr)) | |
47 return true; | |
48 } | |
49 return false; | |
50 } | |
51 | |
52 static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) { | |
53 const auto &Context = R.Declaration->getASTContext(); | |
54 const auto &SourceMgr = Context.getSourceManager(); | |
55 if (R.ShadowDecl) { | |
56 if (isInsideMainFile(R.ShadowDecl->getLocation(), SourceMgr)) | |
57 return true; | |
58 } | |
59 return false; | |
60 } | |
61 | |
62 static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) { | |
63 if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) { | |
64 if (FD->isOverloadedOperator()) | |
65 return SymbolQualitySignals::Operator; | |
66 } | |
67 class Switch | |
68 : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> { | |
69 public: | |
70 #define MAP(DeclType, Category) \ | |
71 SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \ | |
72 return SymbolQualitySignals::Category; \ | |
73 } | |
74 MAP(NamespaceDecl, Namespace); | |
75 MAP(NamespaceAliasDecl, Namespace); | |
76 MAP(TypeDecl, Type); | |
77 MAP(TypeAliasTemplateDecl, Type); | |
78 MAP(ClassTemplateDecl, Type); | |
79 MAP(CXXConstructorDecl, Constructor); | |
80 MAP(CXXDestructorDecl, Destructor); | |
81 MAP(ValueDecl, Variable); | |
82 MAP(VarTemplateDecl, Variable); | |
83 MAP(FunctionDecl, Function); | |
84 MAP(FunctionTemplateDecl, Function); | |
85 MAP(Decl, Unknown); | |
86 #undef MAP | |
87 }; | |
88 return Switch().Visit(&ND); | |
89 } | |
90 | |
91 static SymbolQualitySignals::SymbolCategory | |
92 categorize(const CodeCompletionResult &R) { | |
93 if (R.Declaration) | |
94 return categorize(*R.Declaration); | |
95 if (R.Kind == CodeCompletionResult::RK_Macro) | |
96 return SymbolQualitySignals::Macro; | |
97 // Everything else is a keyword or a pattern. Patterns are mostly keywords | |
98 // too, except a few which we recognize by cursor kind. | |
99 switch (R.CursorKind) { | |
100 case CXCursor_CXXMethod: | |
101 return SymbolQualitySignals::Function; | |
102 case CXCursor_ModuleImportDecl: | |
103 return SymbolQualitySignals::Namespace; | |
104 case CXCursor_MacroDefinition: | |
105 return SymbolQualitySignals::Macro; | |
106 case CXCursor_TypeRef: | |
107 return SymbolQualitySignals::Type; | |
108 case CXCursor_MemberRef: | |
109 return SymbolQualitySignals::Variable; | |
110 case CXCursor_Constructor: | |
111 return SymbolQualitySignals::Constructor; | |
112 default: | |
113 return SymbolQualitySignals::Keyword; | |
114 } | |
115 } | |
116 | |
117 static SymbolQualitySignals::SymbolCategory | |
118 categorize(const index::SymbolInfo &D) { | |
119 switch (D.Kind) { | |
120 case index::SymbolKind::Namespace: | |
121 case index::SymbolKind::NamespaceAlias: | |
122 return SymbolQualitySignals::Namespace; | |
123 case index::SymbolKind::Macro: | |
124 return SymbolQualitySignals::Macro; | |
125 case index::SymbolKind::Enum: | |
126 case index::SymbolKind::Struct: | |
127 case index::SymbolKind::Class: | |
128 case index::SymbolKind::Protocol: | |
129 case index::SymbolKind::Extension: | |
130 case index::SymbolKind::Union: | |
131 case index::SymbolKind::TypeAlias: | |
132 return SymbolQualitySignals::Type; | |
133 case index::SymbolKind::Function: | |
134 case index::SymbolKind::ClassMethod: | |
135 case index::SymbolKind::InstanceMethod: | |
136 case index::SymbolKind::StaticMethod: | |
137 case index::SymbolKind::InstanceProperty: | |
138 case index::SymbolKind::ClassProperty: | |
139 case index::SymbolKind::StaticProperty: | |
140 case index::SymbolKind::ConversionFunction: | |
141 return SymbolQualitySignals::Function; | |
142 case index::SymbolKind::Destructor: | |
143 return SymbolQualitySignals::Destructor; | |
144 case index::SymbolKind::Constructor: | |
145 return SymbolQualitySignals::Constructor; | |
146 case index::SymbolKind::Variable: | |
147 case index::SymbolKind::Field: | |
148 case index::SymbolKind::EnumConstant: | |
149 case index::SymbolKind::Parameter: | |
150 return SymbolQualitySignals::Variable; | |
151 case index::SymbolKind::Using: | |
152 case index::SymbolKind::Module: | |
153 case index::SymbolKind::Unknown: | |
154 return SymbolQualitySignals::Unknown; | |
155 } | |
156 llvm_unreachable("Unknown index::SymbolKind"); | |
157 } | |
158 | |
159 static bool isInstanceMember(const NamedDecl *ND) { | |
160 if (!ND) | |
161 return false; | |
162 if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND)) | |
163 ND = TP->TemplateDecl::getTemplatedDecl(); | |
164 if (const auto *CM = dyn_cast<CXXMethodDecl>(ND)) | |
165 return !CM->isStatic(); | |
166 return isa<FieldDecl>(ND); // Note that static fields are VarDecl. | |
167 } | |
168 | |
169 static bool isInstanceMember(const index::SymbolInfo &D) { | |
170 switch (D.Kind) { | |
171 case index::SymbolKind::InstanceMethod: | |
172 case index::SymbolKind::InstanceProperty: | |
173 case index::SymbolKind::Field: | |
174 return true; | |
175 default: | |
176 return false; | |
177 } | |
178 } | |
179 | |
180 void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) { | |
181 Deprecated |= (SemaCCResult.Availability == CXAvailability_Deprecated); | |
182 Category = categorize(SemaCCResult); | |
183 | |
184 if (SemaCCResult.Declaration) { | |
185 ImplementationDetail |= isImplementationDetail(SemaCCResult.Declaration); | |
186 if (auto *ID = SemaCCResult.Declaration->getIdentifier()) | |
187 ReservedName = ReservedName || isReserved(ID->getName()); | |
188 } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro) | |
189 ReservedName = ReservedName || isReserved(SemaCCResult.Macro->getName()); | |
190 } | |
191 | |
192 void SymbolQualitySignals::merge(const Symbol &IndexResult) { | |
193 Deprecated |= (IndexResult.Flags & Symbol::Deprecated); | |
194 ImplementationDetail |= (IndexResult.Flags & Symbol::ImplementationDetail); | |
195 References = std::max(IndexResult.References, References); | |
196 Category = categorize(IndexResult.SymInfo); | |
197 ReservedName = ReservedName || isReserved(IndexResult.Name); | |
198 } | |
199 | |
200 float SymbolQualitySignals::evaluate() const { | |
201 float Score = 1; | |
202 | |
203 // This avoids a sharp gradient for tail symbols, and also neatly avoids the | |
204 // question of whether 0 references means a bad symbol or missing data. | |
205 if (References >= 10) { | |
206 // Use a sigmoid style boosting function, which flats out nicely for large | |
207 // numbers (e.g. 2.58 for 1M refererences). | |
208 // The following boosting function is equivalent to: | |
209 // m = 0.06 | |
210 // f = 12.0 | |
211 // boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59 | |
212 // Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82), | |
213 // (10K, 2.21), (100K, 2.58), (1M, 2.94) | |
214 float S = std::pow(References, -0.06); | |
215 Score *= 6.0 * (1 - S) / (1 + S) + 0.59; | |
216 } | |
217 | |
218 if (Deprecated) | |
219 Score *= 0.1f; | |
220 if (ReservedName) | |
221 Score *= 0.1f; | |
222 if (ImplementationDetail) | |
223 Score *= 0.2f; | |
224 | |
225 switch (Category) { | |
226 case Keyword: // Often relevant, but misses most signals. | |
227 Score *= 4; // FIXME: important keywords should have specific boosts. | |
228 break; | |
229 case Type: | |
230 case Function: | |
231 case Variable: | |
232 Score *= 1.1f; | |
233 break; | |
234 case Namespace: | |
235 Score *= 0.8f; | |
236 break; | |
237 case Macro: | |
238 case Destructor: | |
239 case Operator: | |
240 Score *= 0.5f; | |
241 break; | |
242 case Constructor: // No boost constructors so they are after class types. | |
243 case Unknown: | |
244 break; | |
245 } | |
246 | |
247 return Score; | |
248 } | |
249 | |
250 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, | |
251 const SymbolQualitySignals &S) { | |
252 OS << llvm::formatv("=== Symbol quality: {0}\n", S.evaluate()); | |
253 OS << llvm::formatv("\tReferences: {0}\n", S.References); | |
254 OS << llvm::formatv("\tDeprecated: {0}\n", S.Deprecated); | |
255 OS << llvm::formatv("\tReserved name: {0}\n", S.ReservedName); | |
256 OS << llvm::formatv("\tCategory: {0}\n", static_cast<int>(S.Category)); | |
257 return OS; | |
258 } | |
259 | |
260 static SymbolRelevanceSignals::AccessibleScope | |
261 computeScope(const NamedDecl *D) { | |
262 // Injected "Foo" within the class "Foo" has file scope, not class scope. | |
263 const DeclContext *DC = D->getDeclContext(); | |
264 if (auto *R = dyn_cast_or_null<RecordDecl>(D)) | |
265 if (R->isInjectedClassName()) | |
266 DC = DC->getParent(); | |
267 // Class constructor should have the same scope as the class. | |
268 if (isa<CXXConstructorDecl>(D)) | |
269 DC = DC->getParent(); | |
270 bool InClass = false; | |
271 for (; !DC->isFileContext(); DC = DC->getParent()) { | |
272 if (DC->isFunctionOrMethod()) | |
273 return SymbolRelevanceSignals::FunctionScope; | |
274 InClass = InClass || DC->isRecord(); | |
275 } | |
276 if (InClass) | |
277 return SymbolRelevanceSignals::ClassScope; | |
278 // ExternalLinkage threshold could be tweaked, e.g. module-visible as global. | |
279 // Avoid caching linkage if it may change after enclosing code completion. | |
280 if (hasUnstableLinkage(D) || D->getLinkageInternal() < ExternalLinkage) | |
281 return SymbolRelevanceSignals::FileScope; | |
282 return SymbolRelevanceSignals::GlobalScope; | |
283 } | |
284 | |
285 void SymbolRelevanceSignals::merge(const Symbol &IndexResult) { | |
286 SymbolURI = IndexResult.CanonicalDeclaration.FileURI; | |
287 SymbolScope = IndexResult.Scope; | |
288 IsInstanceMember |= isInstanceMember(IndexResult.SymInfo); | |
289 if (!(IndexResult.Flags & Symbol::VisibleOutsideFile)) { | |
290 Scope = AccessibleScope::FileScope; | |
291 } | |
292 } | |
293 | |
294 void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { | |
295 if (SemaCCResult.Availability == CXAvailability_NotAvailable || | |
296 SemaCCResult.Availability == CXAvailability_NotAccessible) | |
297 Forbidden = true; | |
298 | |
299 if (SemaCCResult.Declaration) { | |
300 SemaSaysInScope = true; | |
301 // We boost things that have decls in the main file. We give a fixed score | |
302 // for all other declarations in sema as they are already included in the | |
303 // translation unit. | |
304 float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) || | |
305 hasUsingDeclInMainFile(SemaCCResult)) | |
306 ? 1.0 | |
307 : 0.6; | |
308 SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore); | |
309 IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration); | |
310 InBaseClass |= SemaCCResult.InBaseClass; | |
311 } | |
312 | |
313 // Declarations are scoped, others (like macros) are assumed global. | |
314 if (SemaCCResult.Declaration) | |
315 Scope = std::min(Scope, computeScope(SemaCCResult.Declaration)); | |
316 | |
317 NeedsFixIts = !SemaCCResult.FixIts.empty(); | |
318 } | |
319 | |
320 static std::pair<float, unsigned> uriProximity(llvm::StringRef SymbolURI, | |
321 URIDistance *D) { | |
322 if (!D || SymbolURI.empty()) | |
323 return {0.f, 0u}; | |
324 unsigned Distance = D->distance(SymbolURI); | |
325 // Assume approximately default options are used for sensible scoring. | |
326 return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; | |
327 } | |
328 | |
329 static float scopeBoost(ScopeDistance &Distance, | |
330 llvm::Optional<llvm::StringRef> SymbolScope) { | |
331 if (!SymbolScope) | |
332 return 1; | |
333 auto D = Distance.distance(*SymbolScope); | |
334 if (D == FileDistance::Unreachable) | |
335 return 0.6f; | |
336 return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0)); | |
337 } | |
338 | |
339 static llvm::Optional<llvm::StringRef> | |
340 wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) { | |
341 if (ContextWords) | |
342 for (const auto& Word : ContextWords->keys()) | |
343 if (Name.contains_lower(Word)) | |
344 return Word; | |
345 return llvm::None; | |
346 } | |
347 | |
348 float SymbolRelevanceSignals::evaluate() const { | |
349 float Score = 1; | |
350 | |
351 if (Forbidden) | |
352 return 0; | |
353 | |
354 Score *= NameMatch; | |
355 | |
356 // File proximity scores are [0,1] and we translate them into a multiplier in | |
357 // the range from 1 to 3. | |
358 Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first, | |
359 SemaFileProximityScore); | |
360 | |
361 if (ScopeProximityMatch) | |
362 // Use a constant scope boost for sema results, as scopes of sema results | |
363 // can be tricky (e.g. class/function scope). Set to the max boost as we | |
364 // don't load top-level symbols from the preamble and sema results are | |
365 // always in the accessible scope. | |
366 Score *= | |
367 SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope); | |
368 | |
369 if (wordMatching(Name, ContextWords)) | |
370 Score *= 1.5; | |
371 | |
372 // Symbols like local variables may only be referenced within their scope. | |
373 // Conversely if we're in that scope, it's likely we'll reference them. | |
374 if (Query == CodeComplete) { | |
375 // The narrower the scope where a symbol is visible, the more likely it is | |
376 // to be relevant when it is available. | |
377 switch (Scope) { | |
378 case GlobalScope: | |
379 break; | |
380 case FileScope: | |
381 Score *= 1.5f; | |
382 break; | |
383 case ClassScope: | |
384 Score *= 2; | |
385 break; | |
386 case FunctionScope: | |
387 Score *= 4; | |
388 break; | |
389 } | |
390 } else { | |
391 // For non-completion queries, the wider the scope where a symbol is | |
392 // visible, the more likely it is to be relevant. | |
393 switch (Scope) { | |
394 case GlobalScope: | |
395 break; | |
396 case FileScope: | |
397 Score *= 0.5f; | |
398 break; | |
399 default: | |
400 // TODO: Handle other scopes as we start to use them for index results. | |
401 break; | |
402 } | |
403 } | |
404 | |
405 if (TypeMatchesPreferred) | |
406 Score *= 5.0; | |
407 | |
408 // Penalize non-instance members when they are accessed via a class instance. | |
409 if (!IsInstanceMember && | |
410 (Context == CodeCompletionContext::CCC_DotMemberAccess || | |
411 Context == CodeCompletionContext::CCC_ArrowMemberAccess)) { | |
412 Score *= 0.2f; | |
413 } | |
414 | |
415 if (InBaseClass) | |
416 Score *= 0.5f; | |
417 | |
418 // Penalize for FixIts. | |
419 if (NeedsFixIts) | |
420 Score *= 0.5f; | |
421 | |
422 return Score; | |
423 } | |
424 | |
425 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, | |
426 const SymbolRelevanceSignals &S) { | |
427 OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluate()); | |
428 OS << llvm::formatv("\tName: {0}\n", S.Name); | |
429 OS << llvm::formatv("\tName match: {0}\n", S.NameMatch); | |
430 if (S.ContextWords) | |
431 OS << llvm::formatv( | |
432 "\tMatching context word: {0}\n", | |
433 wordMatching(S.Name, S.ContextWords).getValueOr("<none>")); | |
434 OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden); | |
435 OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts); | |
436 OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember); | |
437 OS << llvm::formatv("\tContext: {0}\n", getCompletionKindString(S.Context)); | |
438 OS << llvm::formatv("\tQuery type: {0}\n", static_cast<int>(S.Query)); | |
439 OS << llvm::formatv("\tScope: {0}\n", static_cast<int>(S.Scope)); | |
440 | |
441 OS << llvm::formatv("\tSymbol URI: {0}\n", S.SymbolURI); | |
442 OS << llvm::formatv("\tSymbol scope: {0}\n", | |
443 S.SymbolScope ? *S.SymbolScope : "<None>"); | |
444 | |
445 if (S.FileProximityMatch) { | |
446 auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch); | |
447 OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", | |
448 Score.first, Score.second); | |
449 } | |
450 OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore); | |
451 | |
452 OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope); | |
453 if (S.ScopeProximityMatch) | |
454 OS << llvm::formatv("\tIndex scope boost: {0}\n", | |
455 scopeBoost(*S.ScopeProximityMatch, S.SymbolScope)); | |
456 | |
457 OS << llvm::formatv( | |
458 "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n", | |
459 S.TypeMatchesPreferred, S.HadContextType, S.HadSymbolType); | |
460 | |
461 return OS; | |
462 } | |
463 | |
464 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) { | |
465 return SymbolQuality * SymbolRelevance; | |
466 } | |
467 | |
468 // Produces an integer that sorts in the same order as F. | |
469 // That is: a < b <==> encodeFloat(a) < encodeFloat(b). | |
470 static uint32_t encodeFloat(float F) { | |
471 static_assert(std::numeric_limits<float>::is_iec559, ""); | |
472 constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); | |
473 | |
474 // Get the bits of the float. Endianness is the same as for integers. | |
475 uint32_t U = llvm::FloatToBits(F); | |
476 // IEEE 754 floats compare like sign-magnitude integers. | |
477 if (U & TopBit) // Negative float. | |
478 return 0 - U; // Map onto the low half of integers, order reversed. | |
479 return U + TopBit; // Positive floats map onto the high half of integers. | |
480 } | |
481 | |
482 std::string sortText(float Score, llvm::StringRef Name) { | |
483 // We convert -Score to an integer, and hex-encode for readability. | |
484 // Example: [0.5, "foo"] -> "41000000foo" | |
485 std::string S; | |
486 llvm::raw_string_ostream OS(S); | |
487 llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, | |
488 /*Width=*/2 * sizeof(Score)); | |
489 OS << Name; | |
490 OS.flush(); | |
491 return S; | |
492 } | |
493 | |
494 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, | |
495 const SignatureQualitySignals &S) { | |
496 OS << llvm::formatv("=== Signature Quality:\n"); | |
497 OS << llvm::formatv("\tNumber of parameters: {0}\n", S.NumberOfParameters); | |
498 OS << llvm::formatv("\tNumber of optional parameters: {0}\n", | |
499 S.NumberOfOptionalParameters); | |
500 OS << llvm::formatv("\tKind: {0}\n", S.Kind); | |
501 return OS; | |
502 } | |
503 | |
504 } // namespace clangd | |
505 } // namespace clang |