annotate clang-tools-extra/clangd/FileDistance.cpp @ 221:79ff65ed7e25

LLVM12 Original
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2021 19:15:29 +0900
parents 0572611fdcc8
children c4bab56944e8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
anatofuz
parents:
diff changeset
2 //
anatofuz
parents:
diff changeset
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
anatofuz
parents:
diff changeset
4 // See https://llvm.org/LICENSE.txt for license information.
anatofuz
parents:
diff changeset
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
anatofuz
parents:
diff changeset
6 //
anatofuz
parents:
diff changeset
7 //===----------------------------------------------------------------------===//
anatofuz
parents:
diff changeset
8 //
anatofuz
parents:
diff changeset
9 // The FileDistance structure allows calculating the minimum distance to paths
anatofuz
parents:
diff changeset
10 // in a single tree.
anatofuz
parents:
diff changeset
11 // We simply walk up the path's ancestors until we find a node whose cost is
anatofuz
parents:
diff changeset
12 // known, and add the cost of walking back down. Initialization ensures this
anatofuz
parents:
diff changeset
13 // gives the correct path to the roots.
anatofuz
parents:
diff changeset
14 // We cache the results, so that the runtime is O(|A|), where A is the set of
anatofuz
parents:
diff changeset
15 // all distinct ancestors of visited paths.
anatofuz
parents:
diff changeset
16 //
anatofuz
parents:
diff changeset
17 // Example after initialization with /=2, /bar=0, DownCost = 1:
anatofuz
parents:
diff changeset
18 // / = 2
anatofuz
parents:
diff changeset
19 // /bar = 0
anatofuz
parents:
diff changeset
20 //
anatofuz
parents:
diff changeset
21 // After querying /foo/bar and /bar/foo:
anatofuz
parents:
diff changeset
22 // / = 2
anatofuz
parents:
diff changeset
23 // /bar = 0
anatofuz
parents:
diff changeset
24 // /bar/foo = 1
anatofuz
parents:
diff changeset
25 // /foo = 3
anatofuz
parents:
diff changeset
26 // /foo/bar = 4
anatofuz
parents:
diff changeset
27 //
anatofuz
parents:
diff changeset
28 // URIDistance creates FileDistance lazily for each URI scheme encountered. In
anatofuz
parents:
diff changeset
29 // practice this is a small constant factor.
anatofuz
parents:
diff changeset
30 //
anatofuz
parents:
diff changeset
31 //===-------------------------------------------------------------------------//
anatofuz
parents:
diff changeset
32
anatofuz
parents:
diff changeset
33 #include "FileDistance.h"
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
34 #include "support/Logger.h"
150
anatofuz
parents:
diff changeset
35 #include "llvm/ADT/STLExtras.h"
anatofuz
parents:
diff changeset
36 #include <queue>
anatofuz
parents:
diff changeset
37
anatofuz
parents:
diff changeset
38 namespace clang {
anatofuz
parents:
diff changeset
39 namespace clangd {
anatofuz
parents:
diff changeset
40
anatofuz
parents:
diff changeset
41 // Convert a path into the canonical form.
anatofuz
parents:
diff changeset
42 // Canonical form is either "/", or "/segment" * N:
anatofuz
parents:
diff changeset
43 // C:\foo\bar --> /c:/foo/bar
anatofuz
parents:
diff changeset
44 // /foo/ --> /foo
anatofuz
parents:
diff changeset
45 // a/b/c --> /a/b/c
anatofuz
parents:
diff changeset
46 static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
anatofuz
parents:
diff changeset
47 llvm::SmallString<128> Result = Path.rtrim('/');
anatofuz
parents:
diff changeset
48 native(Result, llvm::sys::path::Style::posix);
anatofuz
parents:
diff changeset
49 if (Result.empty() || Result.front() != '/')
anatofuz
parents:
diff changeset
50 Result.insert(Result.begin(), '/');
anatofuz
parents:
diff changeset
51 return Result;
anatofuz
parents:
diff changeset
52 }
anatofuz
parents:
diff changeset
53
anatofuz
parents:
diff changeset
54 constexpr const unsigned FileDistance::Unreachable;
anatofuz
parents:
diff changeset
55 const llvm::hash_code FileDistance::RootHash =
anatofuz
parents:
diff changeset
56 llvm::hash_value(llvm::StringRef("/"));
anatofuz
parents:
diff changeset
57
anatofuz
parents:
diff changeset
58 FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources,
anatofuz
parents:
diff changeset
59 const FileDistanceOptions &Opts)
anatofuz
parents:
diff changeset
60 : Opts(Opts) {
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
61 llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges;
150
anatofuz
parents:
diff changeset
62 // Compute the best distance following only up edges.
anatofuz
parents:
diff changeset
63 // Keep track of down edges, in case we can use them to improve on this.
anatofuz
parents:
diff changeset
64 for (const auto &S : Sources) {
anatofuz
parents:
diff changeset
65 auto Canonical = canonicalize(S.getKey());
anatofuz
parents:
diff changeset
66 dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
anatofuz
parents:
diff changeset
67 S.second.MaxUpTraversals);
anatofuz
parents:
diff changeset
68 // Walk up to ancestors of this source, assigning cost.
anatofuz
parents:
diff changeset
69 llvm::StringRef Rest = Canonical;
anatofuz
parents:
diff changeset
70 llvm::hash_code Hash = llvm::hash_value(Rest);
anatofuz
parents:
diff changeset
71 for (unsigned I = 0; !Rest.empty(); ++I) {
anatofuz
parents:
diff changeset
72 Rest = parent_path(Rest, llvm::sys::path::Style::posix);
anatofuz
parents:
diff changeset
73 auto NextHash = llvm::hash_value(Rest);
anatofuz
parents:
diff changeset
74 auto &Down = DownEdges[NextHash];
anatofuz
parents:
diff changeset
75 if (!llvm::is_contained(Down, Hash))
anatofuz
parents:
diff changeset
76 Down.push_back(Hash);
anatofuz
parents:
diff changeset
77 // We can't just break after MaxUpTraversals, must still set DownEdges.
anatofuz
parents:
diff changeset
78 if (I > S.getValue().MaxUpTraversals) {
anatofuz
parents:
diff changeset
79 if (Cache.find(Hash) != Cache.end())
anatofuz
parents:
diff changeset
80 break;
anatofuz
parents:
diff changeset
81 } else {
anatofuz
parents:
diff changeset
82 unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
anatofuz
parents:
diff changeset
83 auto R = Cache.try_emplace(Hash, Cost);
anatofuz
parents:
diff changeset
84 if (!R.second) {
anatofuz
parents:
diff changeset
85 if (Cost < R.first->second) {
anatofuz
parents:
diff changeset
86 R.first->second = Cost;
anatofuz
parents:
diff changeset
87 } else {
anatofuz
parents:
diff changeset
88 // If we're not the best way to get to this path, stop assigning.
anatofuz
parents:
diff changeset
89 break;
anatofuz
parents:
diff changeset
90 }
anatofuz
parents:
diff changeset
91 }
anatofuz
parents:
diff changeset
92 }
anatofuz
parents:
diff changeset
93 Hash = NextHash;
anatofuz
parents:
diff changeset
94 }
anatofuz
parents:
diff changeset
95 }
anatofuz
parents:
diff changeset
96 // Now propagate scores parent -> child if that's an improvement.
anatofuz
parents:
diff changeset
97 // BFS ensures we propagate down chains (must visit parents before children).
anatofuz
parents:
diff changeset
98 std::queue<llvm::hash_code> Next;
anatofuz
parents:
diff changeset
99 for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef(""))))
anatofuz
parents:
diff changeset
100 Next.push(Child);
anatofuz
parents:
diff changeset
101 while (!Next.empty()) {
anatofuz
parents:
diff changeset
102 auto Parent = Next.front();
anatofuz
parents:
diff changeset
103 Next.pop();
anatofuz
parents:
diff changeset
104 auto ParentCost = Cache.lookup(Parent);
anatofuz
parents:
diff changeset
105 for (auto Child : DownEdges.lookup(Parent)) {
anatofuz
parents:
diff changeset
106 if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
anatofuz
parents:
diff changeset
107 auto &ChildCost =
anatofuz
parents:
diff changeset
108 Cache.try_emplace(Child, Unreachable).first->getSecond();
anatofuz
parents:
diff changeset
109 if (ParentCost + Opts.DownCost < ChildCost)
anatofuz
parents:
diff changeset
110 ChildCost = ParentCost + Opts.DownCost;
anatofuz
parents:
diff changeset
111 }
anatofuz
parents:
diff changeset
112 Next.push(Child);
anatofuz
parents:
diff changeset
113 }
anatofuz
parents:
diff changeset
114 }
anatofuz
parents:
diff changeset
115 }
anatofuz
parents:
diff changeset
116
anatofuz
parents:
diff changeset
117 unsigned FileDistance::distance(llvm::StringRef Path) {
anatofuz
parents:
diff changeset
118 auto Canonical = canonicalize(Path);
anatofuz
parents:
diff changeset
119 unsigned Cost = Unreachable;
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
120 llvm::SmallVector<llvm::hash_code> Ancestors;
150
anatofuz
parents:
diff changeset
121 // Walk up ancestors until we find a path we know the distance for.
anatofuz
parents:
diff changeset
122 for (llvm::StringRef Rest = Canonical; !Rest.empty();
anatofuz
parents:
diff changeset
123 Rest = parent_path(Rest, llvm::sys::path::Style::posix)) {
anatofuz
parents:
diff changeset
124 auto Hash = llvm::hash_value(Rest);
anatofuz
parents:
diff changeset
125 if (Hash == RootHash && !Ancestors.empty() &&
anatofuz
parents:
diff changeset
126 !Opts.AllowDownTraversalFromRoot) {
anatofuz
parents:
diff changeset
127 Cost = Unreachable;
anatofuz
parents:
diff changeset
128 break;
anatofuz
parents:
diff changeset
129 }
anatofuz
parents:
diff changeset
130 auto It = Cache.find(Hash);
anatofuz
parents:
diff changeset
131 if (It != Cache.end()) {
anatofuz
parents:
diff changeset
132 Cost = It->second;
anatofuz
parents:
diff changeset
133 break;
anatofuz
parents:
diff changeset
134 }
anatofuz
parents:
diff changeset
135 Ancestors.push_back(Hash);
anatofuz
parents:
diff changeset
136 }
anatofuz
parents:
diff changeset
137 // Now we know the costs for (known node, queried node].
anatofuz
parents:
diff changeset
138 // Fill these in, walking down the directory tree.
anatofuz
parents:
diff changeset
139 for (llvm::hash_code Hash : llvm::reverse(Ancestors)) {
anatofuz
parents:
diff changeset
140 if (Cost != Unreachable)
anatofuz
parents:
diff changeset
141 Cost += Opts.DownCost;
anatofuz
parents:
diff changeset
142 Cache.try_emplace(Hash, Cost);
anatofuz
parents:
diff changeset
143 }
anatofuz
parents:
diff changeset
144 dlog("distance({0} = {1})", Path, Cost);
anatofuz
parents:
diff changeset
145 return Cost;
anatofuz
parents:
diff changeset
146 }
anatofuz
parents:
diff changeset
147
anatofuz
parents:
diff changeset
148 unsigned URIDistance::distance(llvm::StringRef URI) {
anatofuz
parents:
diff changeset
149 auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable);
anatofuz
parents:
diff changeset
150 if (!R.second)
anatofuz
parents:
diff changeset
151 return R.first->getSecond();
anatofuz
parents:
diff changeset
152 if (auto U = clangd::URI::parse(URI)) {
anatofuz
parents:
diff changeset
153 dlog("distance({0} = {1})", URI, U->body());
anatofuz
parents:
diff changeset
154 R.first->second = forScheme(U->scheme()).distance(U->body());
anatofuz
parents:
diff changeset
155 } else {
anatofuz
parents:
diff changeset
156 log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
anatofuz
parents:
diff changeset
157 }
anatofuz
parents:
diff changeset
158 return R.first->second;
anatofuz
parents:
diff changeset
159 }
anatofuz
parents:
diff changeset
160
anatofuz
parents:
diff changeset
161 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
anatofuz
parents:
diff changeset
162 auto &Delegate = ByScheme[Scheme];
anatofuz
parents:
diff changeset
163 if (!Delegate) {
anatofuz
parents:
diff changeset
164 llvm::StringMap<SourceParams> SchemeSources;
anatofuz
parents:
diff changeset
165 for (const auto &Source : Sources) {
anatofuz
parents:
diff changeset
166 if (auto U = clangd::URI::create(Source.getKey(), Scheme))
anatofuz
parents:
diff changeset
167 SchemeSources.try_emplace(U->body(), Source.getValue());
anatofuz
parents:
diff changeset
168 else
anatofuz
parents:
diff changeset
169 llvm::consumeError(U.takeError());
anatofuz
parents:
diff changeset
170 }
anatofuz
parents:
diff changeset
171 dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
anatofuz
parents:
diff changeset
172 SchemeSources.size(), Sources.size());
anatofuz
parents:
diff changeset
173 Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
anatofuz
parents:
diff changeset
174 }
anatofuz
parents:
diff changeset
175 return *Delegate;
anatofuz
parents:
diff changeset
176 }
anatofuz
parents:
diff changeset
177
anatofuz
parents:
diff changeset
178 static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
179 llvm::SmallVector<llvm::StringRef> Split;
150
anatofuz
parents:
diff changeset
180 Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
anatofuz
parents:
diff changeset
181 return {"/" + llvm::join(Split, "/"), Split.size()};
anatofuz
parents:
diff changeset
182 }
anatofuz
parents:
diff changeset
183
anatofuz
parents:
diff changeset
184 static FileDistance
anatofuz
parents:
diff changeset
185 createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
anatofuz
parents:
diff changeset
186 FileDistanceOptions Opts;
anatofuz
parents:
diff changeset
187 Opts.UpCost = 2;
anatofuz
parents:
diff changeset
188 Opts.DownCost = 4;
anatofuz
parents:
diff changeset
189 Opts.AllowDownTraversalFromRoot = false;
anatofuz
parents:
diff changeset
190
anatofuz
parents:
diff changeset
191 llvm::StringMap<SourceParams> Sources;
anatofuz
parents:
diff changeset
192 llvm::StringRef Preferred =
anatofuz
parents:
diff changeset
193 QueryScopes.empty() ? "" : QueryScopes.front().c_str();
anatofuz
parents:
diff changeset
194 for (llvm::StringRef S : QueryScopes) {
anatofuz
parents:
diff changeset
195 SourceParams Param;
anatofuz
parents:
diff changeset
196 // Penalize the global scope even it's preferred, as all projects can define
anatofuz
parents:
diff changeset
197 // symbols in it, and there is pattern where using-namespace is used in
anatofuz
parents:
diff changeset
198 // place of enclosing namespaces (e.g. in implementation files).
anatofuz
parents:
diff changeset
199 if (S == Preferred)
anatofuz
parents:
diff changeset
200 Param.Cost = S == "" ? 4 : 0;
anatofuz
parents:
diff changeset
201 else if (Preferred.startswith(S) && !S.empty())
anatofuz
parents:
diff changeset
202 continue; // just rely on up-traversals.
anatofuz
parents:
diff changeset
203 else
anatofuz
parents:
diff changeset
204 Param.Cost = S == "" ? 6 : 2;
anatofuz
parents:
diff changeset
205 auto Path = scopeToPath(S);
anatofuz
parents:
diff changeset
206 // The global namespace is not 'near' its children.
anatofuz
parents:
diff changeset
207 Param.MaxUpTraversals = std::max(Path.second - 1, 0);
anatofuz
parents:
diff changeset
208 Sources[Path.first] = std::move(Param);
anatofuz
parents:
diff changeset
209 }
anatofuz
parents:
diff changeset
210 return FileDistance(std::move(Sources), Opts);
anatofuz
parents:
diff changeset
211 }
anatofuz
parents:
diff changeset
212
anatofuz
parents:
diff changeset
213 ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
anatofuz
parents:
diff changeset
214 : Distance(createScopeFileDistance(QueryScopes)) {}
anatofuz
parents:
diff changeset
215
anatofuz
parents:
diff changeset
216 unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
anatofuz
parents:
diff changeset
217 return Distance.distance(scopeToPath(SymbolScope).first);
anatofuz
parents:
diff changeset
218 }
anatofuz
parents:
diff changeset
219
anatofuz
parents:
diff changeset
220 } // namespace clangd
anatofuz
parents:
diff changeset
221 } // namespace clang