150
|
1 //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8
|
|
9 #include "Headers.h"
|
221
|
10 #include "RIFF.h"
|
150
|
11 #include "index/Index.h"
|
|
12 #include "index/Serialization.h"
|
221
|
13 #include "support/Logger.h"
|
150
|
14 #include "clang/Tooling/CompilationDatabase.h"
|
221
|
15 #include "llvm/ADT/ScopeExit.h"
|
|
16 #include "llvm/ADT/StringExtras.h"
|
|
17 #include "llvm/Support/Compression.h"
|
|
18 #include "llvm/Support/Error.h"
|
150
|
19 #include "llvm/Support/ScopedPrinter.h"
|
|
20 #include "gmock/gmock.h"
|
|
21 #include "gtest/gtest.h"
|
221
|
22 #ifdef LLVM_ON_UNIX
|
|
23 #include <sys/resource.h>
|
|
24 #endif
|
150
|
25
|
173
|
26 using ::testing::ElementsAre;
|
150
|
27 using ::testing::Pair;
|
|
28 using ::testing::UnorderedElementsAre;
|
|
29 using ::testing::UnorderedElementsAreArray;
|
|
30
|
|
31 namespace clang {
|
|
32 namespace clangd {
|
|
33 namespace {
|
|
34
|
|
35 const char *YAML = R"(
|
|
36 ---
|
|
37 !Symbol
|
|
38 ID: 057557CEBF6E6B2D
|
|
39 Name: 'Foo1'
|
|
40 Scope: 'clang::'
|
|
41 SymInfo:
|
|
42 Kind: Function
|
|
43 Lang: Cpp
|
|
44 CanonicalDeclaration:
|
|
45 FileURI: file:///path/foo.h
|
|
46 Start:
|
|
47 Line: 1
|
|
48 Column: 0
|
|
49 End:
|
|
50 Line: 1
|
|
51 Column: 1
|
|
52 Origin: 128
|
|
53 Flags: 129
|
|
54 Documentation: 'Foo doc'
|
|
55 ReturnType: 'int'
|
|
56 IncludeHeaders:
|
|
57 - Header: 'include1'
|
|
58 References: 7
|
|
59 - Header: 'include2'
|
|
60 References: 3
|
|
61 ...
|
|
62 ---
|
|
63 !Symbol
|
|
64 ID: 057557CEBF6E6B2E
|
|
65 Name: 'Foo2'
|
|
66 Scope: 'clang::'
|
|
67 SymInfo:
|
|
68 Kind: Function
|
|
69 Lang: Cpp
|
|
70 CanonicalDeclaration:
|
|
71 FileURI: file:///path/bar.h
|
|
72 Start:
|
|
73 Line: 1
|
|
74 Column: 0
|
|
75 End:
|
|
76 Line: 1
|
|
77 Column: 1
|
|
78 Flags: 2
|
|
79 Signature: '-sig'
|
|
80 CompletionSnippetSuffix: '-snippet'
|
|
81 ...
|
|
82 !Refs
|
|
83 ID: 057557CEBF6E6B2D
|
|
84 References:
|
|
85 - Kind: 4
|
|
86 Location:
|
|
87 FileURI: file:///path/foo.cc
|
|
88 Start:
|
|
89 Line: 5
|
|
90 Column: 3
|
|
91 End:
|
|
92 Line: 5
|
|
93 Column: 8
|
|
94 ...
|
|
95 --- !Relations
|
|
96 Subject:
|
|
97 ID: 6481EE7AF2841756
|
|
98 Predicate: 0
|
|
99 Object:
|
|
100 ID: 6512AEC512EA3A2D
|
|
101 ...
|
173
|
102 --- !Cmd
|
|
103 Directory: 'testdir'
|
|
104 CommandLine:
|
|
105 - 'cmd1'
|
|
106 - 'cmd2'
|
|
107 ...
|
|
108 --- !Source
|
|
109 URI: 'file:///path/source1.cpp'
|
|
110 Flags: 1
|
|
111 Digest: EED8F5EAF25C453C
|
|
112 DirectIncludes:
|
|
113 - 'file:///path/inc1.h'
|
|
114 - 'file:///path/inc2.h'
|
|
115 ...
|
150
|
116 )";
|
|
117
|
|
118 MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
|
|
119 MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
|
|
120 MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") {
|
|
121 return (arg.IncludeHeader == IncludeHeader) && (arg.References == References);
|
|
122 }
|
|
123
|
|
124 TEST(SerializationTest, NoCrashOnEmptyYAML) {
|
|
125 EXPECT_TRUE(bool(readIndexFile("")));
|
|
126 }
|
|
127
|
|
128 TEST(SerializationTest, YAMLConversions) {
|
|
129 auto ParsedYAML = readIndexFile(YAML);
|
|
130 ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
|
|
131 ASSERT_TRUE(bool(ParsedYAML->Symbols));
|
|
132 EXPECT_THAT(
|
|
133 *ParsedYAML->Symbols,
|
|
134 UnorderedElementsAre(ID("057557CEBF6E6B2D"), ID("057557CEBF6E6B2E")));
|
|
135
|
|
136 auto Sym1 = *ParsedYAML->Symbols->find(
|
|
137 cantFail(SymbolID::fromStr("057557CEBF6E6B2D")));
|
|
138 auto Sym2 = *ParsedYAML->Symbols->find(
|
|
139 cantFail(SymbolID::fromStr("057557CEBF6E6B2E")));
|
|
140
|
|
141 EXPECT_THAT(Sym1, QName("clang::Foo1"));
|
|
142 EXPECT_EQ(Sym1.Signature, "");
|
|
143 EXPECT_EQ(Sym1.Documentation, "Foo doc");
|
|
144 EXPECT_EQ(Sym1.ReturnType, "int");
|
|
145 EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h");
|
|
146 EXPECT_EQ(Sym1.Origin, static_cast<SymbolOrigin>(1 << 7));
|
|
147 EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129);
|
|
148 EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion);
|
|
149 EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated);
|
|
150 EXPECT_THAT(Sym1.IncludeHeaders,
|
|
151 UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u),
|
|
152 IncludeHeaderWithRef("include2", 3u)));
|
|
153
|
|
154 EXPECT_THAT(Sym2, QName("clang::Foo2"));
|
|
155 EXPECT_EQ(Sym2.Signature, "-sig");
|
|
156 EXPECT_EQ(Sym2.ReturnType, "");
|
|
157 EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI),
|
|
158 "file:///path/bar.h");
|
|
159 EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
|
|
160 EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
|
|
161
|
|
162 ASSERT_TRUE(bool(ParsedYAML->Refs));
|
|
163 EXPECT_THAT(
|
|
164 *ParsedYAML->Refs,
|
|
165 UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")),
|
|
166 ::testing::SizeIs(1))));
|
|
167 auto Ref1 = ParsedYAML->Refs->begin()->second.front();
|
|
168 EXPECT_EQ(Ref1.Kind, RefKind::Reference);
|
|
169 EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
|
|
170
|
|
171 SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756"));
|
|
172 SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
|
|
173 ASSERT_TRUE(bool(ParsedYAML->Relations));
|
|
174 EXPECT_THAT(
|
|
175 *ParsedYAML->Relations,
|
|
176 UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived}));
|
173
|
177
|
|
178 ASSERT_TRUE(bool(ParsedYAML->Cmd));
|
|
179 auto &Cmd = *ParsedYAML->Cmd;
|
|
180 ASSERT_EQ(Cmd.Directory, "testdir");
|
|
181 EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2"));
|
|
182
|
|
183 ASSERT_TRUE(bool(ParsedYAML->Sources));
|
|
184 const auto *URI = "file:///path/source1.cpp";
|
|
185 ASSERT_TRUE(ParsedYAML->Sources->count(URI));
|
|
186 auto IGNDeserialized = ParsedYAML->Sources->lookup(URI);
|
|
187 EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C");
|
|
188 EXPECT_THAT(IGNDeserialized.DirectIncludes,
|
|
189 ElementsAre("file:///path/inc1.h", "file:///path/inc2.h"));
|
|
190 EXPECT_EQ(IGNDeserialized.URI, URI);
|
|
191 EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1));
|
150
|
192 }
|
|
193
|
|
194 std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
|
|
195 std::vector<std::string> Result;
|
|
196 for (const auto &Sym : Slab)
|
|
197 Result.push_back(toYAML(Sym));
|
|
198 return Result;
|
|
199 }
|
|
200 std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) {
|
|
201 std::vector<std::string> Result;
|
|
202 for (const auto &Refs : Slab)
|
|
203 Result.push_back(toYAML(Refs));
|
|
204 return Result;
|
|
205 }
|
|
206
|
|
207 std::vector<std::string> YAMLFromRelations(const RelationSlab &Slab) {
|
|
208 std::vector<std::string> Result;
|
|
209 for (const auto &Rel : Slab)
|
|
210 Result.push_back(toYAML(Rel));
|
|
211 return Result;
|
|
212 }
|
|
213
|
|
214 TEST(SerializationTest, BinaryConversions) {
|
|
215 auto In = readIndexFile(YAML);
|
|
216 EXPECT_TRUE(bool(In)) << In.takeError();
|
|
217
|
|
218 // Write to binary format, and parse again.
|
|
219 IndexFileOut Out(*In);
|
|
220 Out.Format = IndexFileFormat::RIFF;
|
|
221 std::string Serialized = llvm::to_string(Out);
|
|
222
|
|
223 auto In2 = readIndexFile(Serialized);
|
|
224 ASSERT_TRUE(bool(In2)) << In.takeError();
|
|
225 ASSERT_TRUE(In2->Symbols);
|
|
226 ASSERT_TRUE(In2->Refs);
|
|
227 ASSERT_TRUE(In2->Relations);
|
|
228
|
|
229 // Assert the YAML serializations match, for nice comparisons and diffs.
|
|
230 EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
|
|
231 UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
|
|
232 EXPECT_THAT(YAMLFromRefs(*In2->Refs),
|
|
233 UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
|
|
234 EXPECT_THAT(YAMLFromRelations(*In2->Relations),
|
|
235 UnorderedElementsAreArray(YAMLFromRelations(*In->Relations)));
|
|
236 }
|
|
237
|
|
238 TEST(SerializationTest, SrcsTest) {
|
|
239 auto In = readIndexFile(YAML);
|
|
240 EXPECT_TRUE(bool(In)) << In.takeError();
|
|
241
|
|
242 std::string TestContent("TestContent");
|
|
243 IncludeGraphNode IGN;
|
|
244 IGN.Digest = digest(TestContent);
|
|
245 IGN.DirectIncludes = {"inc1", "inc2"};
|
|
246 IGN.URI = "URI";
|
|
247 IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;
|
|
248 IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
|
|
249 IncludeGraph Sources;
|
|
250 Sources[IGN.URI] = IGN;
|
|
251 // Write to binary format, and parse again.
|
|
252 IndexFileOut Out(*In);
|
|
253 Out.Format = IndexFileFormat::RIFF;
|
|
254 Out.Sources = &Sources;
|
|
255 {
|
|
256 std::string Serialized = llvm::to_string(Out);
|
|
257
|
|
258 auto In = readIndexFile(Serialized);
|
|
259 ASSERT_TRUE(bool(In)) << In.takeError();
|
|
260 ASSERT_TRUE(In->Symbols);
|
|
261 ASSERT_TRUE(In->Refs);
|
|
262 ASSERT_TRUE(In->Sources);
|
|
263 ASSERT_TRUE(In->Sources->count(IGN.URI));
|
|
264 // Assert the YAML serializations match, for nice comparisons and diffs.
|
|
265 EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
|
|
266 UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
|
|
267 EXPECT_THAT(YAMLFromRefs(*In->Refs),
|
|
268 UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
|
|
269 auto IGNDeserialized = In->Sources->lookup(IGN.URI);
|
|
270 EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
|
|
271 EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
|
|
272 EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
|
|
273 EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags);
|
|
274 }
|
|
275 }
|
|
276
|
|
277 TEST(SerializationTest, CmdlTest) {
|
|
278 auto In = readIndexFile(YAML);
|
|
279 EXPECT_TRUE(bool(In)) << In.takeError();
|
|
280
|
|
281 tooling::CompileCommand Cmd;
|
|
282 Cmd.Directory = "testdir";
|
|
283 Cmd.CommandLine.push_back("cmd1");
|
|
284 Cmd.CommandLine.push_back("cmd2");
|
|
285 Cmd.Filename = "ignored";
|
|
286 Cmd.Heuristic = "ignored";
|
|
287 Cmd.Output = "ignored";
|
|
288
|
|
289 IndexFileOut Out(*In);
|
|
290 Out.Format = IndexFileFormat::RIFF;
|
|
291 Out.Cmd = &Cmd;
|
|
292 {
|
|
293 std::string Serialized = llvm::to_string(Out);
|
|
294
|
|
295 auto In = readIndexFile(Serialized);
|
|
296 ASSERT_TRUE(bool(In)) << In.takeError();
|
|
297 ASSERT_TRUE(In->Cmd);
|
|
298
|
|
299 const tooling::CompileCommand &SerializedCmd = In->Cmd.getValue();
|
|
300 EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine);
|
|
301 EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory);
|
|
302 EXPECT_NE(SerializedCmd.Filename, Cmd.Filename);
|
|
303 EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic);
|
|
304 EXPECT_NE(SerializedCmd.Output, Cmd.Output);
|
|
305 }
|
|
306 }
|
221
|
307
|
|
308 // rlimit is part of POSIX.
|
|
309 // ASan uses a lot of address space, so we can't apply strict limits.
|
|
310 #if LLVM_ON_UNIX && !LLVM_ADDRESS_SANITIZER_BUILD
|
|
311 class ScopedMemoryLimit {
|
|
312 struct rlimit OriginalLimit;
|
|
313 bool Succeeded = false;
|
|
314
|
|
315 public:
|
|
316 ScopedMemoryLimit(rlim_t Bytes) {
|
|
317 if (!getrlimit(RLIMIT_AS, &OriginalLimit)) {
|
|
318 struct rlimit NewLimit = OriginalLimit;
|
|
319 NewLimit.rlim_cur = Bytes;
|
|
320 Succeeded = !setrlimit(RLIMIT_AS, &NewLimit);
|
|
321 }
|
|
322 if (!Succeeded)
|
|
323 log("Failed to set rlimit");
|
|
324 }
|
|
325
|
|
326 ~ScopedMemoryLimit() {
|
|
327 if (Succeeded)
|
|
328 setrlimit(RLIMIT_AS, &OriginalLimit);
|
|
329 }
|
|
330 };
|
|
331 #else
|
|
332 class ScopedMemoryLimit {
|
|
333 public:
|
|
334 ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); }
|
|
335 };
|
|
336 #endif
|
|
337
|
|
338 // Test that our deserialization detects invalid array sizes without allocating.
|
|
339 // If this detection fails, the test should allocate a huge array and crash.
|
|
340 TEST(SerializationTest, NoCrashOnBadArraySize) {
|
|
341 // This test is tricky because we need to construct a subtly invalid file.
|
|
342 // First, create a valid serialized file.
|
|
343 auto In = readIndexFile(YAML);
|
|
344 ASSERT_FALSE(!In) << In.takeError();
|
|
345 IndexFileOut Out(*In);
|
|
346 Out.Format = IndexFileFormat::RIFF;
|
|
347 std::string Serialized = llvm::to_string(Out);
|
|
348
|
|
349 // Low-level parse it again and find the `srcs` chunk we're going to corrupt.
|
|
350 auto Parsed = riff::readFile(Serialized);
|
|
351 ASSERT_FALSE(!Parsed) << Parsed.takeError();
|
|
352 auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
|
|
353 return C.ID == riff::fourCC("srcs");
|
|
354 });
|
|
355 ASSERT_NE(Srcs, Parsed->Chunks.end());
|
|
356
|
|
357 // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one.
|
|
358 // The node has:
|
|
359 // - 1 byte: flags (1)
|
|
360 // - varint(stringID): URI
|
|
361 // - 8 byte: file digest
|
|
362 // - varint: DirectIncludes.length
|
|
363 // - repeated varint(stringID): DirectIncludes
|
|
364 // We want to set DirectIncludes.length to a huge number.
|
|
365 // The offset isn't trivial to find, so we use the file digest.
|
|
366 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
|
|
367 unsigned Pos = Srcs->Data.find_first_of(FileDigest);
|
|
368 ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest";
|
|
369 Pos += FileDigest.size();
|
|
370
|
|
371 // Varints are little-endian base-128 numbers, where the top-bit of each byte
|
|
372 // indicates whether there are more. ffffffff0f -> 0xffffffff.
|
|
373 std::string CorruptSrcs =
|
|
374 (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") +
|
|
375 "some_random_garbage")
|
|
376 .str();
|
|
377 Srcs->Data = CorruptSrcs;
|
|
378
|
|
379 // Try to crash rather than hang on large allocation.
|
|
380 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
|
|
381
|
|
382 std::string CorruptFile = llvm::to_string(*Parsed);
|
|
383 auto CorruptParsed = readIndexFile(CorruptFile);
|
|
384 ASSERT_TRUE(!CorruptParsed);
|
|
385 EXPECT_EQ(llvm::toString(CorruptParsed.takeError()),
|
|
386 "malformed or truncated include uri");
|
|
387 }
|
|
388
|
|
389 // Check we detect invalid string table size size without allocating it first.
|
|
390 // If this detection fails, the test should allocate a huge array and crash.
|
|
391 TEST(SerializationTest, NoCrashOnBadStringTableSize) {
|
|
392 if (!llvm::zlib::isAvailable()) {
|
|
393 log("skipping test, no zlib");
|
|
394 return;
|
|
395 }
|
|
396
|
|
397 // First, create a valid serialized file.
|
|
398 auto In = readIndexFile(YAML);
|
|
399 ASSERT_FALSE(!In) << In.takeError();
|
|
400 IndexFileOut Out(*In);
|
|
401 Out.Format = IndexFileFormat::RIFF;
|
|
402 std::string Serialized = llvm::to_string(Out);
|
|
403
|
|
404 // Low-level parse it again, we're going to replace the `stri` chunk.
|
|
405 auto Parsed = riff::readFile(Serialized);
|
|
406 ASSERT_FALSE(!Parsed) << Parsed.takeError();
|
|
407 auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
|
|
408 return C.ID == riff::fourCC("stri");
|
|
409 });
|
|
410 ASSERT_NE(Stri, Parsed->Chunks.end());
|
|
411
|
|
412 // stri consists of an 8 byte uncompressed-size, and then compressed data.
|
|
413 // We'll claim our small amount of data expands to 4GB
|
|
414 std::string CorruptStri =
|
|
415 (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str();
|
|
416 Stri->Data = CorruptStri;
|
|
417 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
|
|
418
|
|
419 // Try to crash rather than hang on large allocation.
|
|
420 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
|
|
421
|
|
422 std::string CorruptFile = llvm::to_string(*Parsed);
|
|
423 auto CorruptParsed = readIndexFile(CorruptFile);
|
|
424 ASSERT_TRUE(!CorruptParsed);
|
|
425 EXPECT_THAT(llvm::toString(CorruptParsed.takeError()),
|
|
426 testing::HasSubstr("bytes is implausible"));
|
|
427 }
|
|
428
|
150
|
429 } // namespace
|
|
430 } // namespace clangd
|
|
431 } // namespace clang
|