121
|
1 //===- ReservoirSampler.cpp - Tests for the ReservoirSampler --------------===//
|
|
2 //
|
|
3 // The LLVM Compiler Infrastructure
|
|
4 //
|
|
5 // This file is distributed under the University of Illinois Open Source
|
|
6 // License. See LICENSE.TXT for details.
|
|
7 //
|
|
8 //===----------------------------------------------------------------------===//
|
|
9
|
|
10 #include "llvm/FuzzMutate/Random.h"
|
|
11 #include "gtest/gtest.h"
|
|
12 #include <random>
|
|
13
|
|
14 using namespace llvm;
|
|
15
|
|
16 TEST(ReservoirSamplerTest, OneItem) {
|
|
17 std::mt19937 Rand;
|
|
18 auto Sampler = makeSampler(Rand, 7, 1);
|
|
19 ASSERT_FALSE(Sampler.isEmpty());
|
|
20 ASSERT_EQ(7, Sampler.getSelection());
|
|
21 }
|
|
22
|
|
23 TEST(ReservoirSamplerTest, NoWeight) {
|
|
24 std::mt19937 Rand;
|
|
25 auto Sampler = makeSampler(Rand, 7, 0);
|
|
26 ASSERT_TRUE(Sampler.isEmpty());
|
|
27 }
|
|
28
|
|
29 TEST(ReservoirSamplerTest, Uniform) {
|
|
30 std::mt19937 Rand;
|
|
31
|
|
32 // Run three chi-squared tests to check that the distribution is reasonably
|
|
33 // uniform.
|
|
34 std::vector<int> Items = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
|
35
|
|
36 int Failures = 0;
|
|
37 for (int Run = 0; Run < 3; ++Run) {
|
|
38 std::vector<int> Counts(Items.size(), 0);
|
|
39
|
|
40 // We need $np_s > 5$ at minimum, but we're better off going a couple of
|
|
41 // orders of magnitude larger.
|
|
42 int N = Items.size() * 5 * 100;
|
|
43 for (int I = 0; I < N; ++I) {
|
|
44 auto Sampler = makeSampler(Rand, Items);
|
|
45 Counts[Sampler.getSelection()] += 1;
|
|
46 }
|
|
47
|
|
48 // Knuth. TAOCP Vol. 2, 3.3.1 (8):
|
|
49 // $V = \frac{1}{n} \sum_{s=1}^{k} \left(\frac{Y_s^2}{p_s}\right) - n$
|
|
50 double Ps = 1.0 / Items.size();
|
|
51 double Sum = 0.0;
|
|
52 for (int Ys : Counts)
|
|
53 Sum += Ys * Ys / Ps;
|
|
54 double V = (Sum / N) - N;
|
|
55
|
|
56 assert(Items.size() == 10 && "Our chi-squared values assume 10 items");
|
|
57 // Since we have 10 items, there are 9 degrees of freedom and the table of
|
|
58 // chi-squared values is as follows:
|
|
59 //
|
|
60 // | p=1% | 5% | 25% | 50% | 75% | 95% | 99% |
|
|
61 // v=9 | 2.088 | 3.325 | 5.899 | 8.343 | 11.39 | 16.92 | 21.67 |
|
|
62 //
|
|
63 // Check that we're in the likely range of results.
|
|
64 //if (V < 2.088 || V > 21.67)
|
|
65 if (V < 2.088 || V > 21.67)
|
|
66 ++Failures;
|
|
67 }
|
|
68 EXPECT_LT(Failures, 3) << "Non-uniform distribution?";
|
|
69 }
|