173
|
1 //===-- Benchmark memory specific tools -------------------------*- C++ -*-===//
|
150
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8
|
|
9 // This file complements the `benchmark` header with memory specific tools and
|
|
10 // benchmarking facilities.
|
|
11
|
|
12 #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
|
|
13 #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
|
|
14
|
|
15 #include "LibcBenchmark.h"
|
|
16 #include "llvm/ADT/DenseMap.h"
|
|
17 #include "llvm/ADT/SmallVector.h"
|
|
18 #include "llvm/Support/Alignment.h"
|
|
19 #include <cstdint>
|
|
20 #include <random>
|
|
21
|
|
22 namespace llvm {
|
|
23 namespace libc_benchmarks {
|
|
24
|
|
25 //--------------
|
|
26 // Configuration
|
|
27 //--------------
|
|
28
|
|
29 // Specifies a range of sizes to explore.
|
|
30 struct SizeRange {
|
|
31 uint32_t From = 0; // Inclusive
|
|
32 uint32_t To = 1024; // Inclusive
|
|
33 uint32_t Step = 1;
|
|
34 };
|
|
35
|
|
36 // An object to define how to test a memory function.
|
|
37 struct StudyConfiguration {
|
|
38 // The number of run for the study.
|
|
39 uint32_t Runs = 1;
|
|
40
|
|
41 // The size of the buffers (1 buffer for memset but 2 for memcpy or memcmp).
|
|
42 // When testing small sizes, it's important to keep the total allocated
|
|
43 // size under the size of the L1 cache (usually 16 or 32KiB). The framework
|
|
44 // will also use 2KiB of additional L1 memory to store the function
|
|
45 // parameters.
|
|
46 uint32_t BufferSize = 8192;
|
|
47
|
|
48 // The range of sizes to exercise.
|
|
49 SizeRange Size;
|
|
50
|
|
51 MaybeAlign AddressAlignment; // Unset : Use start of buffer which is at
|
|
52 // least cache line aligned)
|
|
53 // 1 : Use random address,
|
|
54 // >1 : Use random address aligned to value.
|
|
55
|
|
56 // The value to use for memset.
|
|
57 uint8_t MemsetValue = 0;
|
|
58
|
|
59 // The mismatch position for memcmp.
|
|
60 uint32_t MemcmpMismatchAt = 0; // 0 : Buffer compare equal,
|
|
61 // >0 : Buffer compare different at byte N-1.
|
|
62 };
|
|
63
|
|
64 //--------
|
|
65 // Results
|
|
66 //--------
|
|
67
|
|
68 // The time to run one iteration of the function under test for the specified
|
|
69 // Size.
|
|
70 struct Measurement {
|
|
71 uint32_t Size = 0;
|
|
72 Duration Runtime = {};
|
|
73 };
|
|
74
|
|
75 // The measurements for a specific function.
|
|
76 struct FunctionMeasurements {
|
|
77 std::string Name;
|
|
78 std::vector<Measurement> Measurements;
|
|
79 };
|
|
80
|
|
81 // The root object containing all the data (configuration and measurements).
|
|
82 struct Study {
|
|
83 HostState Host;
|
|
84 BenchmarkOptions Options;
|
|
85 StudyConfiguration Configuration;
|
|
86 SmallVector<FunctionMeasurements, 4> Functions;
|
|
87 };
|
|
88
|
|
89 // Provides an aligned, dynamically allocated buffer.
|
|
90 class AlignedBuffer {
|
|
91 char *const Buffer = nullptr;
|
|
92 size_t Size = 0;
|
|
93
|
|
94 public:
|
|
95 static constexpr size_t Alignment = 1024;
|
|
96
|
|
97 explicit AlignedBuffer(size_t Size)
|
|
98 : Buffer(static_cast<char *>(aligned_alloc(1024, Size))), Size(Size) {}
|
|
99 ~AlignedBuffer() { free(Buffer); }
|
|
100
|
|
101 inline char *operator+(size_t Index) { return Buffer + Index; }
|
|
102 inline const char *operator+(size_t Index) const { return Buffer + Index; }
|
|
103 inline char &operator[](size_t Index) { return Buffer[Index]; }
|
|
104 inline const char &operator[](size_t Index) const { return Buffer[Index]; }
|
|
105 inline char *begin() { return Buffer; }
|
|
106 inline char *end() { return Buffer + Size; }
|
|
107 };
|
|
108
|
|
109 // Implements the ParameterProvider abstraction needed by the `benchmark`
|
|
110 // function. This implementation makes sure that all parameters will fit into
|
|
111 // `StorageSize` bytes. The total memory accessed during benchmark should be
|
|
112 // less than the data L1 cache, that is the storage for the ParameterProvider
|
|
113 // and the memory buffers.
|
|
114 template <typename Context, size_t StorageSize = 8 * 1024>
|
|
115 class SmallParameterProvider {
|
|
116 using ParameterType = typename Context::ParameterType;
|
|
117 ByteConstrainedArray<ParameterType, StorageSize> Parameters;
|
|
118 size_t LastIterations;
|
|
119 Context &Ctx;
|
|
120
|
|
121 public:
|
|
122 explicit SmallParameterProvider(Context &C) : Ctx(C) {}
|
|
123 SmallParameterProvider(const SmallParameterProvider &) = delete;
|
|
124 SmallParameterProvider &operator=(const SmallParameterProvider &) = delete;
|
|
125
|
|
126 // Useful to compute the histogram of the size parameter.
|
|
127 CircularArrayRef<ParameterType> getLastBatch() const {
|
|
128 return cycle(Parameters, LastIterations);
|
|
129 }
|
|
130
|
|
131 // Implements the interface needed by the `benchmark` function.
|
|
132 CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
|
|
133 LastIterations = Iterations;
|
|
134 Ctx.Randomize(Parameters);
|
|
135 return getLastBatch();
|
|
136 }
|
|
137 };
|
|
138
|
|
139 // Helper to generate random buffer offsets that satisfy the configuration
|
|
140 // constraints.
|
|
141 class OffsetDistribution {
|
|
142 std::uniform_int_distribution<uint32_t> Distribution;
|
|
143 uint32_t Factor;
|
|
144
|
|
145 public:
|
|
146 explicit OffsetDistribution(const StudyConfiguration &Conf);
|
|
147
|
|
148 template <class Generator> uint32_t operator()(Generator &G) {
|
|
149 return Distribution(G) * Factor;
|
|
150 }
|
|
151 };
|
|
152
|
|
153 // Helper to generate random buffer offsets that satisfy the configuration
|
|
154 // constraints. It is specifically designed to benchmark `memcmp` functions
|
|
155 // where we may want the Nth byte to differ.
|
|
156 class MismatchOffsetDistribution {
|
|
157 std::uniform_int_distribution<size_t> MismatchIndexSelector;
|
|
158 llvm::SmallVector<uint32_t, 16> MismatchIndices;
|
|
159 const uint32_t MismatchAt;
|
|
160
|
|
161 public:
|
|
162 explicit MismatchOffsetDistribution(const StudyConfiguration &Conf);
|
|
163
|
|
164 explicit operator bool() const { return !MismatchIndices.empty(); }
|
|
165
|
|
166 const llvm::SmallVectorImpl<uint32_t> &getMismatchIndices() const {
|
|
167 return MismatchIndices;
|
|
168 }
|
|
169
|
|
170 template <class Generator> uint32_t operator()(Generator &G, uint32_t Size) {
|
|
171 const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)];
|
|
172 // We need to position the offset so that a mismatch occurs at MismatchAt.
|
|
173 if (Size >= MismatchAt)
|
|
174 return MismatchIndex - MismatchAt;
|
|
175 // Size is too small to trigger the mismatch.
|
|
176 return MismatchIndex - Size - 1;
|
|
177 }
|
|
178 };
|
|
179
|
|
180 } // namespace libc_benchmarks
|
|
181 } // namespace llvm
|
|
182
|
|
183 #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
|