1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Support for registering benchmarks for functions.
16
17/* Example usage:
18// Define a function that executes the code to be measured a
19// specified number of times:
20static void BM_StringCreation(benchmark::State& state) {
21 for (auto _ : state)
22 std::string empty_string;
23}
24
25// Register the function as a benchmark
26BENCHMARK(BM_StringCreation);
27
28// Define another benchmark
29static void BM_StringCopy(benchmark::State& state) {
30 std::string x = "hello";
31 for (auto _ : state)
32 std::string copy(x);
33}
34BENCHMARK(BM_StringCopy);
35
36// Augment the main() program to invoke benchmarks if specified
37// via the --benchmark_filter command line flag. E.g.,
38// my_unittest --benchmark_filter=all
39// my_unittest --benchmark_filter=BM_StringCreation
40// my_unittest --benchmark_filter=String
41// my_unittest --benchmark_filter='Copy|Creation'
42int main(int argc, char** argv) {
43 benchmark::Initialize(&argc, argv);
44 benchmark::RunSpecifiedBenchmarks();
45 benchmark::Shutdown();
46 return 0;
47}
48
49// Sometimes a family of microbenchmarks can be implemented with
50// just one routine that takes an extra argument to specify which
51// one of the family of benchmarks to run. For example, the following
52// code defines a family of microbenchmarks for measuring the speed
53// of memcpy() calls of different lengths:
54
55static void BM_memcpy(benchmark::State& state) {
56 char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
57 memset(src, 'x', state.range(0));
58 for (auto _ : state)
59 memcpy(dst, src, state.range(0));
60 state.SetBytesProcessed(state.iterations() * state.range(0));
61 delete[] src; delete[] dst;
62}
63BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64
65// The preceding code is quite repetitive, and can be replaced with the
66// following short-hand. The following invocation will pick a few
67// appropriate arguments in the specified range and will generate a
68// microbenchmark for each such argument.
69BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70
71// You might have a microbenchmark that depends on two inputs. For
72// example, the following code defines a family of microbenchmarks for
73// measuring the speed of set insertion.
74static void BM_SetInsert(benchmark::State& state) {
75 set<int> data;
76 for (auto _ : state) {
77 state.PauseTiming();
78 data = ConstructRandomSet(state.range(0));
79 state.ResumeTiming();
80 for (int j = 0; j < state.range(1); ++j)
81 data.insert(RandomNumber());
82 }
83}
84BENCHMARK(BM_SetInsert)
85 ->Args({1<<10, 128})
86 ->Args({2<<10, 128})
87 ->Args({4<<10, 128})
88 ->Args({8<<10, 128})
89 ->Args({1<<10, 512})
90 ->Args({2<<10, 512})
91 ->Args({4<<10, 512})
92 ->Args({8<<10, 512});
93
94// The preceding code is quite repetitive, and can be replaced with
95// the following short-hand. The following macro will pick a few
96// appropriate arguments in the product of the two specified ranges
97// and will generate a microbenchmark for each such pair.
98BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99
100// For more complex patterns of inputs, passing a custom function
101// to Apply allows programmatic specification of an
102// arbitrary set of arguments to run the microbenchmark on.
103// The following example enumerates a dense range on
104// one parameter, and a sparse range on the second.
105static void CustomArguments(benchmark::internal::Benchmark* b) {
106 for (int i = 0; i <= 10; ++i)
107 for (int j = 32; j <= 1024*1024; j *= 8)
108 b->Args({i, j});
109}
110BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111
112// Templated microbenchmarks work the same way:
113// Produce then consume 'size' messages 'iters' times
114// Measures throughput in the absence of multiprogramming.
115template <class Q> int BM_Sequential(benchmark::State& state) {
116 Q q;
117 typename Q::value_type v;
118 for (auto _ : state) {
119 for (int i = state.range(0); i--; )
120 q.push(v);
121 for (int e = state.range(0); e--; )
122 q.Wait(&v);
123 }
124 // actually messages, not bytes:
125 state.SetBytesProcessed(state.iterations() * state.range(0));
126}
127BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128
129Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130benchmark. This option overrides the `benchmark_min_time` flag.
131
132void BM_test(benchmark::State& state) {
133 ... body ...
134}
135BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
136
137In a multithreaded test, it is guaranteed that none of the threads will start
138until all have reached the loop start, and all will have finished before any
139thread exits the loop body. As such, any global setup or teardown you want to
140do can be wrapped in a check against the thread index:
141
142static void BM_MultiThreaded(benchmark::State& state) {
143 if (state.thread_index() == 0) {
144 // Setup code here.
145 }
146 for (auto _ : state) {
147 // Run the test as normal.
148 }
149 if (state.thread_index() == 0) {
150 // Teardown code here.
151 }
152}
153BENCHMARK(BM_MultiThreaded)->Threads(4);
154
155
156If a benchmark runs a few milliseconds it may be hard to visually compare the
157measured times, since the output data is given in nanoseconds per default. In
158order to manually set the time unit, you can specify it manually:
159
160BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
161*/
162
163#ifndef BENCHMARK_BENCHMARK_H_
164#define BENCHMARK_BENCHMARK_H_
165
166// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
167#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
168#define BENCHMARK_HAS_CXX11
169#endif
170
171// This _MSC_VER check should detect VS 2017 v15.3 and newer.
172#if __cplusplus >= 201703L || \
173 (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L)
174#define BENCHMARK_HAS_CXX17
175#endif
176
177#include <stdint.h>
178
179#include <algorithm>
180#include <cassert>
181#include <cstddef>
182#include <iosfwd>
183#include <limits>
184#include <map>
185#include <set>
186#include <string>
187#include <utility>
188#include <vector>
189
190#if defined(BENCHMARK_HAS_CXX11)
191#include <atomic>
192#include <initializer_list>
193#include <type_traits>
194#include <utility>
195#endif
196
197#if defined(_MSC_VER)
198#include <intrin.h> // for _ReadWriteBarrier
199#endif
200
201#ifndef BENCHMARK_HAS_CXX11
202#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
203 TypeName(const TypeName&); \
204 TypeName& operator=(const TypeName&)
205#else
206#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
207 TypeName(const TypeName&) = delete; \
208 TypeName& operator=(const TypeName&) = delete
209#endif
210
211#ifdef BENCHMARK_HAS_CXX17
212#define BENCHMARK_UNUSED [[maybe_unused]]
213#elif defined(__GNUC__) || defined(__clang__)
214#define BENCHMARK_UNUSED __attribute__((unused))
215#else
216#define BENCHMARK_UNUSED
217#endif
218
219#if defined(__GNUC__) || defined(__clang__)
220#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
221#define BENCHMARK_NOEXCEPT noexcept
222#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
223#elif defined(_MSC_VER) && !defined(__clang__)
224#define BENCHMARK_ALWAYS_INLINE __forceinline
225#if _MSC_VER >= 1900
226#define BENCHMARK_NOEXCEPT noexcept
227#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
228#else
229#define BENCHMARK_NOEXCEPT
230#define BENCHMARK_NOEXCEPT_OP(x)
231#endif
232#define __func__ __FUNCTION__
233#else
234#define BENCHMARK_ALWAYS_INLINE
235#define BENCHMARK_NOEXCEPT
236#define BENCHMARK_NOEXCEPT_OP(x)
237#endif
238
239#define BENCHMARK_INTERNAL_TOSTRING2(x) #x
240#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
241
242#if defined(__GNUC__) || defined(__clang__)
243#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
244#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
245#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
246 _Pragma("GCC diagnostic push") \
247 _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
248#define BENCHMARK_RESTORE_DEPRECATED_WARNING \
249 _Pragma("GCC diagnostic pop")
250#else
251#define BENCHMARK_BUILTIN_EXPECT(x, y) x
252#define BENCHMARK_DEPRECATED_MSG(msg)
253#define BENCHMARK_WARNING_MSG(msg) \
254 __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
255 __LINE__) ") : warning note: " msg))
256#define BENCHMARK_DISABLE_DEPRECATED_WARNING
257#define BENCHMARK_RESTORE_DEPRECATED_WARNING
258#endif
259
260#if defined(__GNUC__) && !defined(__clang__)
261#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
262#endif
263
264#ifndef __has_builtin
265#define __has_builtin(x) 0
266#endif
267
268#if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
269#define BENCHMARK_UNREACHABLE() __builtin_unreachable()
270#elif defined(_MSC_VER)
271#define BENCHMARK_UNREACHABLE() __assume(false)
272#else
273#define BENCHMARK_UNREACHABLE() ((void)0)
274#endif
275
276#ifdef BENCHMARK_HAS_CXX11
277#define BENCHMARK_OVERRIDE override
278#else
279#define BENCHMARK_OVERRIDE
280#endif
281
282namespace benchmark {
283class BenchmarkReporter;
284
285void Initialize(int* argc, char** argv);
286void Shutdown();
287
288// Report to stdout all arguments in 'argv' as unrecognized except the first.
289// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
290bool ReportUnrecognizedArguments(int argc, char** argv);
291
292// Returns the current value of --benchmark_filter.
293std::string GetBenchmarkFilter();
294
295// Generate a list of benchmarks matching the specified --benchmark_filter flag
296// and if --benchmark_list_tests is specified return after printing the name
297// of each matching benchmark. Otherwise run each matching benchmark and
298// report the results.
299//
300// spec : Specify the benchmarks to run. If users do not specify this arg,
301// then the value of FLAGS_benchmark_filter
302// will be used.
303//
304// The second and third overload use the specified 'display_reporter' and
305// 'file_reporter' respectively. 'file_reporter' will write to the file
306// specified
307// by '--benchmark_output'. If '--benchmark_output' is not given the
308// 'file_reporter' is ignored.
309//
310// RETURNS: The number of matching benchmarks.
311size_t RunSpecifiedBenchmarks();
312size_t RunSpecifiedBenchmarks(std::string spec);
313
314size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
315size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
316 std::string spec);
317
318size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
319 BenchmarkReporter* file_reporter);
320size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
321 BenchmarkReporter* file_reporter,
322 std::string spec);
323
324// If a MemoryManager is registered (via RegisterMemoryManager()),
325// it can be used to collect and report allocation metrics for a run of the
326// benchmark.
327class MemoryManager {
328 public:
329 static const int64_t TombstoneValue;
330
331 struct Result {
332 Result()
333 : num_allocs(0),
334 max_bytes_used(0),
335 total_allocated_bytes(TombstoneValue),
336 net_heap_growth(TombstoneValue) {}
337
338 // The number of allocations made in total between Start and Stop.
339 int64_t num_allocs;
340
341 // The peak memory use between Start and Stop.
342 int64_t max_bytes_used;
343
344 // The total memory allocated, in bytes, between Start and Stop.
345 // Init'ed to TombstoneValue if metric not available.
346 int64_t total_allocated_bytes;
347
348 // The net changes in memory, in bytes, between Start and Stop.
349 // ie., total_allocated_bytes - total_deallocated_bytes.
350 // Init'ed to TombstoneValue if metric not available.
351 int64_t net_heap_growth;
352 };
353
354 virtual ~MemoryManager() {}
355
356 // Implement this to start recording allocation information.
357 virtual void Start() = 0;
358
359 // Implement this to stop recording and fill out the given Result structure.
360 BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead")
361 virtual void Stop(Result* result) = 0;
362
363 // FIXME(vyng): Make this pure virtual once we've migrated current users.
364 BENCHMARK_DISABLE_DEPRECATED_WARNING
365 virtual void Stop(Result& result) { Stop(result: &result); }
366 BENCHMARK_RESTORE_DEPRECATED_WARNING
367};
368
369// Register a MemoryManager instance that will be used to collect and report
370// allocation measurements for benchmark runs.
371void RegisterMemoryManager(MemoryManager* memory_manager);
372
373// Add a key-value pair to output as part of the context stanza in the report.
374void AddCustomContext(const std::string& key, const std::string& value);
375
376namespace internal {
377class Benchmark;
378class BenchmarkImp;
379class BenchmarkFamilies;
380
381void UseCharPointer(char const volatile*);
382
383// Take ownership of the pointer and register the benchmark. Return the
384// registered benchmark.
385Benchmark* RegisterBenchmarkInternal(Benchmark*);
386
387// Ensure that the standard streams are properly initialized in every TU.
388int InitializeStreams();
389BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
390
391} // namespace internal
392
393#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
394 defined(__EMSCRIPTEN__)
395#define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
396#endif
397
398// Force the compiler to flush pending writes to global memory. Acts as an
399// effective read/write barrier
400#ifdef BENCHMARK_HAS_CXX11
401inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
402 std::atomic_signal_fence(m: std::memory_order_acq_rel);
403}
404#endif
405
406// The DoNotOptimize(...) function can be used to prevent a value or
407// expression from being optimized away by the compiler. This function is
408// intended to add little to no overhead.
409// See: https://youtu.be/nXaxk27zwlk?t=2441
410#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
411template <class Tp>
412inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
413 asm volatile("" : : "r,m"(value) : "memory");
414}
415
416template <class Tp>
417inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
418#if defined(__clang__)
419 asm volatile("" : "+r,m"(value) : : "memory");
420#else
421 asm volatile("" : "+m,r"(value) : : "memory");
422#endif
423}
424
425#ifndef BENCHMARK_HAS_CXX11
426inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
427 asm volatile("" : : : "memory");
428}
429#endif
430#elif defined(_MSC_VER)
431template <class Tp>
432inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
433 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
434 _ReadWriteBarrier();
435}
436
437#ifndef BENCHMARK_HAS_CXX11
438inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
439#endif
440#else
441template <class Tp>
442inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
443 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
444}
445// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
446#endif
447
448// This class is used for user-defined counters.
449class Counter {
450 public:
451 enum Flags {
452 kDefaults = 0,
453 // Mark the counter as a rate. It will be presented divided
454 // by the duration of the benchmark.
455 kIsRate = 1U << 0U,
456 // Mark the counter as a thread-average quantity. It will be
457 // presented divided by the number of threads.
458 kAvgThreads = 1U << 1U,
459 // Mark the counter as a thread-average rate. See above.
460 kAvgThreadsRate = kIsRate | kAvgThreads,
461 // Mark the counter as a constant value, valid/same for *every* iteration.
462 // When reporting, it will be *multiplied* by the iteration count.
463 kIsIterationInvariant = 1U << 2U,
464 // Mark the counter as a constant rate.
465 // When reporting, it will be *multiplied* by the iteration count
466 // and then divided by the duration of the benchmark.
467 kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
468 // Mark the counter as a iteration-average quantity.
469 // It will be presented divided by the number of iterations.
470 kAvgIterations = 1U << 3U,
471 // Mark the counter as a iteration-average rate. See above.
472 kAvgIterationsRate = kIsRate | kAvgIterations,
473
474 // In the end, invert the result. This is always done last!
475 kInvert = 1U << 31U
476 };
477
478 enum OneK {
479 // 1'000 items per 1k
480 kIs1000 = 1000,
481 // 1'024 items per 1k
482 kIs1024 = 1024
483 };
484
485 double value;
486 Flags flags;
487 OneK oneK;
488
489 BENCHMARK_ALWAYS_INLINE
490 Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
491 : value(v), flags(f), oneK(k) {}
492
493 BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
494 BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
495};
496
497// A helper for user code to create unforeseen combinations of Flags, without
498// having to do this cast manually each time, or providing this operator.
499Counter::Flags inline operator|(const Counter::Flags& LHS,
500 const Counter::Flags& RHS) {
501 return static_cast<Counter::Flags>(static_cast<int>(LHS) |
502 static_cast<int>(RHS));
503}
504
505// This is the container for the user-defined counters.
506typedef std::map<std::string, Counter> UserCounters;
507
508// TimeUnit is passed to a benchmark in order to specify the order of magnitude
509// for the measured time.
510enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
511
512// BigO is passed to a benchmark in order to specify the asymptotic
513// computational
514// complexity for the benchmark. In case oAuto is selected, complexity will be
515// calculated automatically to the best fit.
516enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
517
518typedef uint64_t IterationCount;
519
520enum StatisticUnit { kTime, kPercentage };
521
522// BigOFunc is passed to a benchmark in order to specify the asymptotic
523// computational complexity for the benchmark.
524typedef double(BigOFunc)(IterationCount);
525
526// StatisticsFunc is passed to a benchmark in order to compute some descriptive
527// statistics over all the measurements of some type
528typedef double(StatisticsFunc)(const std::vector<double>&);
529
530namespace internal {
531struct Statistics {
532 std::string name_;
533 StatisticsFunc* compute_;
534 StatisticUnit unit_;
535
536 Statistics(const std::string& name, StatisticsFunc* compute,
537 StatisticUnit unit = kTime)
538 : name_(name), compute_(compute), unit_(unit) {}
539};
540
541class BenchmarkInstance;
542class ThreadTimer;
543class ThreadManager;
544class PerfCountersMeasurement;
545
546enum AggregationReportMode
547#if defined(BENCHMARK_HAS_CXX11)
548 : unsigned
549#else
550#endif
551{
552 // The mode has not been manually specified
553 ARM_Unspecified = 0,
554 // The mode is user-specified.
555 // This may or may not be set when the following bit-flags are set.
556 ARM_Default = 1U << 0U,
557 // File reporter should only output aggregates.
558 ARM_FileReportAggregatesOnly = 1U << 1U,
559 // Display reporter should only output aggregates
560 ARM_DisplayReportAggregatesOnly = 1U << 2U,
561 // Both reporters should only display aggregates.
562 ARM_ReportAggregatesOnly =
563 ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
564};
565
566} // namespace internal
567
568// State is passed to a running Benchmark and contains state for the
569// benchmark to use.
570class State {
571 public:
572 struct StateIterator;
573 friend struct StateIterator;
574
575 // Returns iterators used to run each iteration of a benchmark using a
576 // C++11 ranged-based for loop. These functions should not be called directly.
577 //
578 // REQUIRES: The benchmark has not started running yet. Neither begin nor end
579 // have been called previously.
580 //
581 // NOTE: KeepRunning may not be used after calling either of these functions.
582 BENCHMARK_ALWAYS_INLINE StateIterator begin();
583 BENCHMARK_ALWAYS_INLINE StateIterator end();
584
585 // Returns true if the benchmark should continue through another iteration.
586 // NOTE: A benchmark may not return from the test until KeepRunning() has
587 // returned false.
588 bool KeepRunning();
589
590 // Returns true iff the benchmark should run n more iterations.
591 // REQUIRES: 'n' > 0.
592 // NOTE: A benchmark must not return from the test until KeepRunningBatch()
593 // has returned false.
594 // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
595 //
596 // Intended usage:
597 // while (state.KeepRunningBatch(1000)) {
598 // // process 1000 elements
599 // }
600 bool KeepRunningBatch(IterationCount n);
601
602 // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
603 // by the current thread.
604 // Stop the benchmark timer. If not called, the timer will be
605 // automatically stopped after the last iteration of the benchmark loop.
606 //
607 // For threaded benchmarks the PauseTiming() function only pauses the timing
608 // for the current thread.
609 //
610 // NOTE: The "real time" measurement is per-thread. If different threads
611 // report different measurements the largest one is reported.
612 //
613 // NOTE: PauseTiming()/ResumeTiming() are relatively
614 // heavyweight, and so their use should generally be avoided
615 // within each benchmark iteration, if possible.
616 void PauseTiming();
617
618 // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
619 // by the current thread.
620 // Start the benchmark timer. The timer is NOT running on entrance to the
621 // benchmark function. It begins running after control flow enters the
622 // benchmark loop.
623 //
624 // NOTE: PauseTiming()/ResumeTiming() are relatively
625 // heavyweight, and so their use should generally be avoided
626 // within each benchmark iteration, if possible.
627 void ResumeTiming();
628
629 // REQUIRES: 'SkipWithError(...)' has not been called previously by the
630 // current thread.
631 // Report the benchmark as resulting in an error with the specified 'msg'.
632 // After this call the user may explicitly 'return' from the benchmark.
633 //
634 // If the ranged-for style of benchmark loop is used, the user must explicitly
635 // break from the loop, otherwise all future iterations will be run.
636 // If the 'KeepRunning()' loop is used the current thread will automatically
637 // exit the loop at the end of the current iteration.
638 //
639 // For threaded benchmarks only the current thread stops executing and future
640 // calls to `KeepRunning()` will block until all threads have completed
641 // the `KeepRunning()` loop. If multiple threads report an error only the
642 // first error message is used.
643 //
644 // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
645 // the current scope immediately. If the function is called from within
646 // the 'KeepRunning()' loop the current iteration will finish. It is the users
647 // responsibility to exit the scope as needed.
648 void SkipWithError(const char* msg);
649
650 // Returns true if an error has been reported with 'SkipWithError(...)'.
651 bool error_occurred() const { return error_occurred_; }
652
653 // REQUIRES: called exactly once per iteration of the benchmarking loop.
654 // Set the manually measured time for this benchmark iteration, which
655 // is used instead of automatically measured time if UseManualTime() was
656 // specified.
657 //
658 // For threaded benchmarks the final value will be set to the largest
659 // reported values.
660 void SetIterationTime(double seconds);
661
662 // Set the number of bytes processed by the current benchmark
663 // execution. This routine is typically called once at the end of a
664 // throughput oriented benchmark.
665 //
666 // REQUIRES: a benchmark has exited its benchmarking loop.
667 BENCHMARK_ALWAYS_INLINE
668 void SetBytesProcessed(int64_t bytes) {
669 counters["bytes_per_second"] =
670 Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
671 }
672
673 BENCHMARK_ALWAYS_INLINE
674 int64_t bytes_processed() const {
675 if (counters.find(k: "bytes_per_second") != counters.end())
676 return static_cast<int64_t>(counters.at(k: "bytes_per_second"));
677 return 0;
678 }
679
680 // If this routine is called with complexity_n > 0 and complexity report is
681 // requested for the
682 // family benchmark, then current benchmark will be part of the computation
683 // and complexity_n will
684 // represent the length of N.
685 BENCHMARK_ALWAYS_INLINE
686 void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
687
688 BENCHMARK_ALWAYS_INLINE
689 int64_t complexity_length_n() const { return complexity_n_; }
690
691 // If this routine is called with items > 0, then an items/s
692 // label is printed on the benchmark report line for the currently
693 // executing benchmark. It is typically called at the end of a processing
694 // benchmark where a processing items/second output is desired.
695 //
696 // REQUIRES: a benchmark has exited its benchmarking loop.
697 BENCHMARK_ALWAYS_INLINE
698 void SetItemsProcessed(int64_t items) {
699 counters["items_per_second"] =
700 Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
701 }
702
703 BENCHMARK_ALWAYS_INLINE
704 int64_t items_processed() const {
705 if (counters.find(k: "items_per_second") != counters.end())
706 return static_cast<int64_t>(counters.at(k: "items_per_second"));
707 return 0;
708 }
709
710 // If this routine is called, the specified label is printed at the
711 // end of the benchmark report line for the currently executing
712 // benchmark. Example:
713 // static void BM_Compress(benchmark::State& state) {
714 // ...
715 // double compress = input_size / output_size;
716 // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
717 // }
718 // Produces output that looks like:
719 // BM_Compress 50 50 14115038 compress:27.3%
720 //
721 // REQUIRES: a benchmark has exited its benchmarking loop.
722 void SetLabel(const char* label);
723
724 void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
725 this->SetLabel(str.c_str());
726 }
727
728 // Range arguments for this run. CHECKs if the argument has been set.
729 BENCHMARK_ALWAYS_INLINE
730 int64_t range(std::size_t pos = 0) const {
731 assert(range_.size() > pos);
732 return range_[pos];
733 }
734
735 BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
736 int64_t range_x() const { return range(pos: 0); }
737
738 BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
739 int64_t range_y() const { return range(pos: 1); }
740
741 // Number of threads concurrently executing the benchmark.
742 BENCHMARK_ALWAYS_INLINE
743 int threads() const { return threads_; }
744
745 // Index of the executing thread. Values from [0, threads).
746 BENCHMARK_ALWAYS_INLINE
747 int thread_index() const { return thread_index_; }
748
749 BENCHMARK_ALWAYS_INLINE
750 IterationCount iterations() const {
751 if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
752 return 0;
753 }
754 return max_iterations - total_iterations_ + batch_leftover_;
755 }
756
757 private:
758 // items we expect on the first cache line (ie 64 bytes of the struct)
759 // When total_iterations_ is 0, KeepRunning() and friends will return false.
760 // May be larger than max_iterations.
761 IterationCount total_iterations_;
762
763 // When using KeepRunningBatch(), batch_leftover_ holds the number of
764 // iterations beyond max_iters that were run. Used to track
765 // completed_iterations_ accurately.
766 IterationCount batch_leftover_;
767
768 public:
769 const IterationCount max_iterations;
770
771 private:
772 bool started_;
773 bool finished_;
774 bool error_occurred_;
775
776 private: // items we don't need on the first cache line
777 std::vector<int64_t> range_;
778
779 int64_t complexity_n_;
780
781 public:
782 // Container for user-defined counters.
783 UserCounters counters;
784
785 private:
786 State(IterationCount max_iters, const std::vector<int64_t>& ranges,
787 int thread_i, int n_threads, internal::ThreadTimer* timer,
788 internal::ThreadManager* manager,
789 internal::PerfCountersMeasurement* perf_counters_measurement);
790
791 void StartKeepRunning();
792 // Implementation of KeepRunning() and KeepRunningBatch().
793 // is_batch must be true unless n is 1.
794 bool KeepRunningInternal(IterationCount n, bool is_batch);
795 void FinishKeepRunning();
796
797 const int thread_index_;
798 const int threads_;
799
800 internal::ThreadTimer* const timer_;
801 internal::ThreadManager* const manager_;
802 internal::PerfCountersMeasurement* const perf_counters_measurement_;
803
804 friend class internal::BenchmarkInstance;
805};
806
807inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
808 return KeepRunningInternal(n: 1, /*is_batch=*/is_batch: false);
809}
810
811inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
812 return KeepRunningInternal(n, /*is_batch=*/is_batch: true);
813}
814
815inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
816 bool is_batch) {
817 // total_iterations_ is set to 0 by the constructor, and always set to a
818 // nonzero value by StartKepRunning().
819 assert(n > 0);
820 // n must be 1 unless is_batch is true.
821 assert(is_batch || n == 1);
822 if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
823 total_iterations_ -= n;
824 return true;
825 }
826 if (!started_) {
827 StartKeepRunning();
828 if (!error_occurred_ && total_iterations_ >= n) {
829 total_iterations_ -= n;
830 return true;
831 }
832 }
833 // For non-batch runs, total_iterations_ must be 0 by now.
834 if (is_batch && total_iterations_ != 0) {
835 batch_leftover_ = n - total_iterations_;
836 total_iterations_ = 0;
837 return true;
838 }
839 FinishKeepRunning();
840 return false;
841}
842
843struct State::StateIterator {
844 struct BENCHMARK_UNUSED Value {};
845 typedef std::forward_iterator_tag iterator_category;
846 typedef Value value_type;
847 typedef Value reference;
848 typedef Value pointer;
849 typedef std::ptrdiff_t difference_type;
850
851 private:
852 friend class State;
853 BENCHMARK_ALWAYS_INLINE
854 StateIterator() : cached_(0), parent_() {}
855
856 BENCHMARK_ALWAYS_INLINE
857 explicit StateIterator(State* st)
858 : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
859
860 public:
861 BENCHMARK_ALWAYS_INLINE
862 Value operator*() const { return Value(); }
863
864 BENCHMARK_ALWAYS_INLINE
865 StateIterator& operator++() {
866 assert(cached_ > 0);
867 --cached_;
868 return *this;
869 }
870
871 BENCHMARK_ALWAYS_INLINE
872 bool operator!=(StateIterator const&) const {
873 if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
874 parent_->FinishKeepRunning();
875 return false;
876 }
877
878 private:
879 IterationCount cached_;
880 State* const parent_;
881};
882
883inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
884 return StateIterator(this);
885}
886inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
887 StartKeepRunning();
888 return StateIterator();
889}
890
891namespace internal {
892
893typedef void(Function)(State&);
894
895// ------------------------------------------------------
896// Benchmark registration object. The BENCHMARK() macro expands
897// into an internal::Benchmark* object. Various methods can
898// be called on this object to change the properties of the benchmark.
899// Each method returns "this" so that multiple method calls can
900// chained into one expression.
901class Benchmark {
902 public:
903 virtual ~Benchmark();
904
905 // Note: the following methods all return "this" so that multiple
906 // method calls can be chained together in one expression.
907
908 // Specify the name of the benchmark
909 Benchmark* Name(const std::string& name);
910
911 // Run this benchmark once with "x" as the extra argument passed
912 // to the function.
913 // REQUIRES: The function passed to the constructor must accept an arg1.
914 Benchmark* Arg(int64_t x);
915
916 // Run this benchmark with the given time unit for the generated output report
917 Benchmark* Unit(TimeUnit unit);
918
919 // Run this benchmark once for a number of values picked from the
920 // range [start..limit]. (start and limit are always picked.)
921 // REQUIRES: The function passed to the constructor must accept an arg1.
922 Benchmark* Range(int64_t start, int64_t limit);
923
924 // Run this benchmark once for all values in the range [start..limit] with
925 // specific step
926 // REQUIRES: The function passed to the constructor must accept an arg1.
927 Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
928
929 // Run this benchmark once with "args" as the extra arguments passed
930 // to the function.
931 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
932 Benchmark* Args(const std::vector<int64_t>& args);
933
934 // Equivalent to Args({x, y})
935 // NOTE: This is a legacy C++03 interface provided for compatibility only.
936 // New code should use 'Args'.
937 Benchmark* ArgPair(int64_t x, int64_t y) {
938 std::vector<int64_t> args;
939 args.push_back(x: x);
940 args.push_back(x: y);
941 return Args(args);
942 }
943
944 // Run this benchmark once for a number of values picked from the
945 // ranges [start..limit]. (starts and limits are always picked.)
946 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
947 Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
948
949 // Run this benchmark once for each combination of values in the (cartesian)
950 // product of the supplied argument lists.
951 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
952 Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists);
953
954 // Equivalent to ArgNames({name})
955 Benchmark* ArgName(const std::string& name);
956
957 // Set the argument names to display in the benchmark name. If not called,
958 // only argument values will be shown.
959 Benchmark* ArgNames(const std::vector<std::string>& names);
960
961 // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
962 // NOTE: This is a legacy C++03 interface provided for compatibility only.
963 // New code should use 'Ranges'.
964 Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
965 std::vector<std::pair<int64_t, int64_t> > ranges;
966 ranges.push_back(x: std::make_pair(t1&: lo1, t2&: hi1));
967 ranges.push_back(x: std::make_pair(t1&: lo2, t2&: hi2));
968 return Ranges(ranges);
969 }
970
971 // Pass this benchmark object to *func, which can customize
972 // the benchmark by calling various methods like Arg, Args,
973 // Threads, etc.
974 Benchmark* Apply(void (*func)(Benchmark* benchmark));
975
976 // Set the range multiplier for non-dense range. If not called, the range
977 // multiplier kRangeMultiplier will be used.
978 Benchmark* RangeMultiplier(int multiplier);
979
980 // Set the minimum amount of time to use when running this benchmark. This
981 // option overrides the `benchmark_min_time` flag.
982 // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
983 Benchmark* MinTime(double t);
984
985 // Specify the amount of iterations that should be run by this benchmark.
986 // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
987 //
988 // NOTE: This function should only be used when *exact* iteration control is
989 // needed and never to control or limit how long a benchmark runs, where
990 // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
991 Benchmark* Iterations(IterationCount n);
992
993 // Specify the amount of times to repeat this benchmark. This option overrides
994 // the `benchmark_repetitions` flag.
995 // REQUIRES: `n > 0`
996 Benchmark* Repetitions(int n);
997
998 // Specify if each repetition of the benchmark should be reported separately
999 // or if only the final statistics should be reported. If the benchmark
1000 // is not repeated then the single result is always reported.
1001 // Applies to *ALL* reporters (display and file).
1002 Benchmark* ReportAggregatesOnly(bool value = true);
1003
1004 // Same as ReportAggregatesOnly(), but applies to display reporter only.
1005 Benchmark* DisplayAggregatesOnly(bool value = true);
1006
1007 // By default, the CPU time is measured only for the main thread, which may
1008 // be unrepresentative if the benchmark uses threads internally. If called,
1009 // the total CPU time spent by all the threads will be measured instead.
1010 // By default, the only the main thread CPU time will be measured.
1011 Benchmark* MeasureProcessCPUTime();
1012
1013 // If a particular benchmark should use the Wall clock instead of the CPU time
1014 // (be it either the CPU time of the main thread only (default), or the
1015 // total CPU usage of the benchmark), call this method. If called, the elapsed
1016 // (wall) time will be used to control how many iterations are run, and in the
1017 // printing of items/second or MB/seconds values.
1018 // If not called, the CPU time used by the benchmark will be used.
1019 Benchmark* UseRealTime();
1020
1021 // If a benchmark must measure time manually (e.g. if GPU execution time is
1022 // being
1023 // measured), call this method. If called, each benchmark iteration should
1024 // call
1025 // SetIterationTime(seconds) to report the measured time, which will be used
1026 // to control how many iterations are run, and in the printing of items/second
1027 // or MB/second values.
1028 Benchmark* UseManualTime();
1029
1030 // Set the asymptotic computational complexity for the benchmark. If called
1031 // the asymptotic computational complexity will be shown on the output.
1032 Benchmark* Complexity(BigO complexity = benchmark::oAuto);
1033
1034 // Set the asymptotic computational complexity for the benchmark. If called
1035 // the asymptotic computational complexity will be shown on the output.
1036 Benchmark* Complexity(BigOFunc* complexity);
1037
1038 // Add this statistics to be computed over all the values of benchmark run
1039 Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics,
1040 StatisticUnit unit = kTime);
1041
1042 // Support for running multiple copies of the same benchmark concurrently
1043 // in multiple threads. This may be useful when measuring the scaling
1044 // of some piece of code.
1045
1046 // Run one instance of this benchmark concurrently in t threads.
1047 Benchmark* Threads(int t);
1048
1049 // Pick a set of values T from [min_threads,max_threads].
1050 // min_threads and max_threads are always included in T. Run this
1051 // benchmark once for each value in T. The benchmark run for a
1052 // particular value t consists of t threads running the benchmark
1053 // function concurrently. For example, consider:
1054 // BENCHMARK(Foo)->ThreadRange(1,16);
1055 // This will run the following benchmarks:
1056 // Foo in 1 thread
1057 // Foo in 2 threads
1058 // Foo in 4 threads
1059 // Foo in 8 threads
1060 // Foo in 16 threads
1061 Benchmark* ThreadRange(int min_threads, int max_threads);
1062
1063 // For each value n in the range, run this benchmark once using n threads.
1064 // min_threads and max_threads are always included in the range.
1065 // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
1066 // a benchmark with 1, 4, 7 and 8 threads.
1067 Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1068
1069 // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1070 Benchmark* ThreadPerCpu();
1071
1072 virtual void Run(State& state) = 0;
1073
1074 protected:
1075 explicit Benchmark(const char* name);
1076 Benchmark(Benchmark const&);
1077 void SetName(const char* name);
1078
1079 int ArgsCnt() const;
1080
1081 private:
1082 friend class BenchmarkFamilies;
1083 friend class BenchmarkInstance;
1084
1085 std::string name_;
1086 AggregationReportMode aggregation_report_mode_;
1087 std::vector<std::string> arg_names_; // Args for all benchmark runs
1088 std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs
1089 TimeUnit time_unit_;
1090 int range_multiplier_;
1091 double min_time_;
1092 IterationCount iterations_;
1093 int repetitions_;
1094 bool measure_process_cpu_time_;
1095 bool use_real_time_;
1096 bool use_manual_time_;
1097 BigO complexity_;
1098 BigOFunc* complexity_lambda_;
1099 std::vector<Statistics> statistics_;
1100 std::vector<int> thread_counts_;
1101
1102 Benchmark& operator=(Benchmark const&);
1103};
1104
1105} // namespace internal
1106
1107// Create and register a benchmark with the specified 'name' that invokes
1108// the specified functor 'fn'.
1109//
1110// RETURNS: A pointer to the registered benchmark.
1111internal::Benchmark* RegisterBenchmark(const char* name,
1112 internal::Function* fn);
1113
1114#if defined(BENCHMARK_HAS_CXX11)
1115template <class Lambda>
1116internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
1117#endif
1118
1119// Remove all registered benchmarks. All pointers to previously registered
1120// benchmarks are invalidated.
1121void ClearRegisteredBenchmarks();
1122
1123namespace internal {
1124// The class used to hold all Benchmarks created from static function.
1125// (ie those created using the BENCHMARK(...) macros.
1126class FunctionBenchmark : public Benchmark {
1127 public:
1128 FunctionBenchmark(const char* name, Function* func)
1129 : Benchmark(name), func_(func) {}
1130
1131 virtual void Run(State& st) BENCHMARK_OVERRIDE;
1132
1133 private:
1134 Function* func_;
1135};
1136
1137#ifdef BENCHMARK_HAS_CXX11
1138template <class Lambda>
1139class LambdaBenchmark : public Benchmark {
1140 public:
1141 virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
1142
1143 private:
1144 template <class OLambda>
1145 LambdaBenchmark(const char* name, OLambda&& lam)
1146 : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1147
1148 LambdaBenchmark(LambdaBenchmark const&) = delete;
1149
1150 private:
1151 template <class Lam>
1152 friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
1153
1154 Lambda lambda_;
1155};
1156#endif
1157
1158} // namespace internal
1159
1160inline internal::Benchmark* RegisterBenchmark(const char* name,
1161 internal::Function* fn) {
1162 return internal::RegisterBenchmarkInternal(
1163 ::new internal::FunctionBenchmark(name, fn));
1164}
1165
1166#ifdef BENCHMARK_HAS_CXX11
1167template <class Lambda>
1168internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
1169 using BenchType =
1170 internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1171 return internal::RegisterBenchmarkInternal(
1172 ::new BenchType(name, std::forward<Lambda>(fn)));
1173}
1174#endif
1175
1176#if defined(BENCHMARK_HAS_CXX11) && \
1177 (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1178template <class Lambda, class... Args>
1179internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1180 Args&&... args) {
1181 return benchmark::RegisterBenchmark(
1182 name, [=](benchmark::State& st) { fn(st, args...); });
1183}
1184#else
1185#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1186#endif
1187
1188// The base class for all fixture tests.
1189class Fixture : public internal::Benchmark {
1190 public:
1191 Fixture() : internal::Benchmark("") {}
1192
1193 virtual void Run(State& st) BENCHMARK_OVERRIDE {
1194 this->SetUp(st);
1195 this->BenchmarkCase(st);
1196 this->TearDown(st);
1197 }
1198
1199 // These will be deprecated ...
1200 virtual void SetUp(const State&) {}
1201 virtual void TearDown(const State&) {}
1202 // ... In favor of these.
1203 virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
1204 virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1205
1206 protected:
1207 virtual void BenchmarkCase(State&) = 0;
1208};
1209
1210} // namespace benchmark
1211
1212// ------------------------------------------------------
1213// Macro to register benchmarks
1214
1215// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1216// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1217// empty. If X is empty the expression becomes (+1 == +0).
1218#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1219#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1220#else
1221#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1222#endif
1223
1224// Helpers for generating unique variable names
1225#ifdef BENCHMARK_HAS_CXX11
1226#define BENCHMARK_PRIVATE_NAME(...) \
1227 BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
1228 __VA_ARGS__)
1229#else
1230#define BENCHMARK_PRIVATE_NAME(n) \
1231 BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1232#endif // BENCHMARK_HAS_CXX11
1233
1234#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1235#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1236// Helper for concatenation with macro name expansion
1237#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1238 BaseClass##_##Method##_Benchmark
1239
1240#define BENCHMARK_PRIVATE_DECLARE(n) \
1241 static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1242 BENCHMARK_UNUSED
1243
1244#ifdef BENCHMARK_HAS_CXX11
1245#define BENCHMARK(...) \
1246 BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1247 (::benchmark::internal::RegisterBenchmarkInternal( \
1248 new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
1249 &__VA_ARGS__)))
1250#else
1251#define BENCHMARK(n) \
1252 BENCHMARK_PRIVATE_DECLARE(n) = \
1253 (::benchmark::internal::RegisterBenchmarkInternal( \
1254 new ::benchmark::internal::FunctionBenchmark(#n, n)))
1255#endif // BENCHMARK_HAS_CXX11
1256
1257// Old-style macros
1258#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1259#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1260#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1261#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1262#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1263 BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1264
1265#ifdef BENCHMARK_HAS_CXX11
1266
1267// Register a benchmark which invokes the function specified by `func`
1268// with the additional arguments specified by `...`.
1269//
1270// For example:
1271//
1272// template <class ...ExtraArgs>`
1273// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1274// [...]
1275//}
1276// /* Registers a benchmark named "BM_takes_args/int_string_test` */
1277// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1278#define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1279 BENCHMARK_PRIVATE_DECLARE(func) = \
1280 (::benchmark::internal::RegisterBenchmarkInternal( \
1281 new ::benchmark::internal::FunctionBenchmark( \
1282 #func "/" #test_case_name, \
1283 [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1284
1285#endif // BENCHMARK_HAS_CXX11
1286
1287// This will register a benchmark for a templatized function. For example:
1288//
1289// template<int arg>
1290// void BM_Foo(int iters);
1291//
1292// BENCHMARK_TEMPLATE(BM_Foo, 1);
1293//
1294// will register BM_Foo<1> as a benchmark.
1295#define BENCHMARK_TEMPLATE1(n, a) \
1296 BENCHMARK_PRIVATE_DECLARE(n) = \
1297 (::benchmark::internal::RegisterBenchmarkInternal( \
1298 new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1299
1300#define BENCHMARK_TEMPLATE2(n, a, b) \
1301 BENCHMARK_PRIVATE_DECLARE(n) = \
1302 (::benchmark::internal::RegisterBenchmarkInternal( \
1303 new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1304 n<a, b>)))
1305
1306#ifdef BENCHMARK_HAS_CXX11
1307#define BENCHMARK_TEMPLATE(n, ...) \
1308 BENCHMARK_PRIVATE_DECLARE(n) = \
1309 (::benchmark::internal::RegisterBenchmarkInternal( \
1310 new ::benchmark::internal::FunctionBenchmark( \
1311 #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1312#else
1313#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1314#endif
1315
1316#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1317 class BaseClass##_##Method##_Benchmark : public BaseClass { \
1318 public: \
1319 BaseClass##_##Method##_Benchmark() : BaseClass() { \
1320 this->SetName(#BaseClass "/" #Method); \
1321 } \
1322 \
1323 protected: \
1324 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1325 };
1326
1327#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1328 class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1329 public: \
1330 BaseClass##_##Method##_Benchmark() : BaseClass<a>() { \
1331 this->SetName(#BaseClass "<" #a ">/" #Method); \
1332 } \
1333 \
1334 protected: \
1335 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1336 };
1337
1338#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1339 class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1340 public: \
1341 BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() { \
1342 this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1343 } \
1344 \
1345 protected: \
1346 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1347 };
1348
1349#ifdef BENCHMARK_HAS_CXX11
1350#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1351 class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1352 public: \
1353 BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \
1354 this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1355 } \
1356 \
1357 protected: \
1358 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1359 };
1360#else
1361#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
1362 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1363#endif
1364
1365#define BENCHMARK_DEFINE_F(BaseClass, Method) \
1366 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1367 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1368
1369#define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1370 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1371 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1372
1373#define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1374 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1375 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1376
1377#ifdef BENCHMARK_HAS_CXX11
1378#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1379 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1380 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1381#else
1382#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
1383 BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1384#endif
1385
1386#define BENCHMARK_REGISTER_F(BaseClass, Method) \
1387 BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1388
1389#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1390 BENCHMARK_PRIVATE_DECLARE(TestName) = \
1391 (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1392
1393// This macro will define and register a benchmark within a fixture class.
1394#define BENCHMARK_F(BaseClass, Method) \
1395 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1396 BENCHMARK_REGISTER_F(BaseClass, Method); \
1397 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1398
1399#define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1400 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1401 BENCHMARK_REGISTER_F(BaseClass, Method); \
1402 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1403
1404#define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1405 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1406 BENCHMARK_REGISTER_F(BaseClass, Method); \
1407 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1408
1409#ifdef BENCHMARK_HAS_CXX11
1410#define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1411 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1412 BENCHMARK_REGISTER_F(BaseClass, Method); \
1413 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1414#else
1415#define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
1416 BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1417#endif
1418
1419// Helper macro to create a main routine in a test that runs the benchmarks
1420#define BENCHMARK_MAIN() \
1421 int main(int argc, char** argv) { \
1422 ::benchmark::Initialize(&argc, argv); \
1423 if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1424 ::benchmark::RunSpecifiedBenchmarks(); \
1425 ::benchmark::Shutdown(); \
1426 return 0; \
1427 } \
1428 int main(int, char**)
1429
1430// ------------------------------------------------------
1431// Benchmark Reporters
1432
1433namespace benchmark {
1434
1435struct CPUInfo {
1436 struct CacheInfo {
1437 std::string type;
1438 int level;
1439 int size;
1440 int num_sharing;
1441 };
1442
1443 enum Scaling {
1444 UNKNOWN,
1445 ENABLED,
1446 DISABLED
1447 };
1448
1449 int num_cpus;
1450 Scaling scaling;
1451 double cycles_per_second;
1452 std::vector<CacheInfo> caches;
1453 std::vector<double> load_avg;
1454
1455 static const CPUInfo& Get();
1456
1457 private:
1458 CPUInfo();
1459 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1460};
1461
1462// Adding Struct for System Information
1463struct SystemInfo {
1464 std::string name;
1465 static const SystemInfo& Get();
1466
1467 private:
1468 SystemInfo();
1469 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1470};
1471
1472// BenchmarkName contains the components of the Benchmark's name
1473// which allows individual fields to be modified or cleared before
1474// building the final name using 'str()'.
1475struct BenchmarkName {
1476 std::string function_name;
1477 std::string args;
1478 std::string min_time;
1479 std::string iterations;
1480 std::string repetitions;
1481 std::string time_type;
1482 std::string threads;
1483
1484 // Return the full name of the benchmark with each non-empty
1485 // field separated by a '/'
1486 std::string str() const;
1487};
1488
1489// Interface for custom benchmark result printers.
1490// By default, benchmark reports are printed to stdout. However an application
1491// can control the destination of the reports by calling
1492// RunSpecifiedBenchmarks and passing it a custom reporter object.
1493// The reporter object must implement the following interface.
1494class BenchmarkReporter {
1495 public:
1496 struct Context {
1497 CPUInfo const& cpu_info;
1498 SystemInfo const& sys_info;
1499 // The number of chars in the longest benchmark name.
1500 size_t name_field_width;
1501 static const char* executable_name;
1502 Context();
1503 };
1504
1505 struct Run {
1506 static const int64_t no_repetition_index = -1;
1507 enum RunType { RT_Iteration, RT_Aggregate };
1508
1509 Run()
1510 : run_type(RT_Iteration),
1511 aggregate_unit(kTime),
1512 error_occurred(false),
1513 iterations(1),
1514 threads(1),
1515 time_unit(kNanosecond),
1516 real_accumulated_time(0),
1517 cpu_accumulated_time(0),
1518 max_heapbytes_used(0),
1519 complexity(oNone),
1520 complexity_lambda(),
1521 complexity_n(0),
1522 report_big_o(false),
1523 report_rms(false),
1524 counters(),
1525 memory_result(NULL),
1526 allocs_per_iter(0.0) {}
1527
1528 std::string benchmark_name() const;
1529 BenchmarkName run_name;
1530 int64_t family_index;
1531 int64_t per_family_instance_index;
1532 RunType run_type;
1533 std::string aggregate_name;
1534 StatisticUnit aggregate_unit;
1535 std::string report_label; // Empty if not set by benchmark.
1536 bool error_occurred;
1537 std::string error_message;
1538
1539 IterationCount iterations;
1540 int64_t threads;
1541 int64_t repetition_index;
1542 int64_t repetitions;
1543 TimeUnit time_unit;
1544 double real_accumulated_time;
1545 double cpu_accumulated_time;
1546
1547 // Return a value representing the real time per iteration in the unit
1548 // specified by 'time_unit'.
1549 // NOTE: If 'iterations' is zero the returned value represents the
1550 // accumulated time.
1551 double GetAdjustedRealTime() const;
1552
1553 // Return a value representing the cpu time per iteration in the unit
1554 // specified by 'time_unit'.
1555 // NOTE: If 'iterations' is zero the returned value represents the
1556 // accumulated time.
1557 double GetAdjustedCPUTime() const;
1558
1559 // This is set to 0.0 if memory tracing is not enabled.
1560 double max_heapbytes_used;
1561
1562 // Keep track of arguments to compute asymptotic complexity
1563 BigO complexity;
1564 BigOFunc* complexity_lambda;
1565 int64_t complexity_n;
1566
1567 // what statistics to compute from the measurements
1568 const std::vector<internal::Statistics>* statistics;
1569
1570 // Inform print function whether the current run is a complexity report
1571 bool report_big_o;
1572 bool report_rms;
1573
1574 UserCounters counters;
1575
1576 // Memory metrics.
1577 const MemoryManager::Result* memory_result;
1578 double allocs_per_iter;
1579 };
1580
1581 struct PerFamilyRunReports {
1582 PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1583
1584 // How many runs will all instances of this benchmark perform?
1585 int num_runs_total;
1586
1587 // How many runs have happened already?
1588 int num_runs_done;
1589
1590 // The reports about (non-errneous!) runs of this family.
1591 std::vector<BenchmarkReporter::Run> Runs;
1592 };
1593
1594 // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1595 // and the error stream set to 'std::cerr'
1596 BenchmarkReporter();
1597
1598 // Called once for every suite of benchmarks run.
1599 // The parameter "context" contains information that the
1600 // reporter may wish to use when generating its report, for example the
1601 // platform under which the benchmarks are running. The benchmark run is
1602 // never started if this function returns false, allowing the reporter
1603 // to skip runs based on the context information.
1604 virtual bool ReportContext(const Context& context) = 0;
1605
1606 // Called once for each group of benchmark runs, gives information about
1607 // cpu-time and heap memory usage during the benchmark run. If the group
1608 // of runs contained more than two entries then 'report' contains additional
1609 // elements representing the mean and standard deviation of those runs.
1610 // Additionally if this group of runs was the last in a family of benchmarks
1611 // 'reports' contains additional entries representing the asymptotic
1612 // complexity and RMS of that benchmark family.
1613 virtual void ReportRuns(const std::vector<Run>& report) = 0;
1614
1615 // Called once and only once after ever group of benchmarks is run and
1616 // reported.
1617 virtual void Finalize() {}
1618
1619 // REQUIRES: The object referenced by 'out' is valid for the lifetime
1620 // of the reporter.
1621 void SetOutputStream(std::ostream* out) {
1622 assert(out);
1623 output_stream_ = out;
1624 }
1625
1626 // REQUIRES: The object referenced by 'err' is valid for the lifetime
1627 // of the reporter.
1628 void SetErrorStream(std::ostream* err) {
1629 assert(err);
1630 error_stream_ = err;
1631 }
1632
1633 std::ostream& GetOutputStream() const { return *output_stream_; }
1634
1635 std::ostream& GetErrorStream() const { return *error_stream_; }
1636
1637 virtual ~BenchmarkReporter();
1638
1639 // Write a human readable string to 'out' representing the specified
1640 // 'context'.
1641 // REQUIRES: 'out' is non-null.
1642 static void PrintBasicContext(std::ostream* out, Context const& context);
1643
1644 private:
1645 std::ostream* output_stream_;
1646 std::ostream* error_stream_;
1647};
1648
1649// Simple reporter that outputs benchmark data to the console. This is the
1650// default reporter used by RunSpecifiedBenchmarks().
1651class ConsoleReporter : public BenchmarkReporter {
1652 public:
1653 enum OutputOptions {
1654 OO_None = 0,
1655 OO_Color = 1,
1656 OO_Tabular = 2,
1657 OO_ColorTabular = OO_Color | OO_Tabular,
1658 OO_Defaults = OO_ColorTabular
1659 };
1660 explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
1661 : output_options_(opts_),
1662 name_field_width_(0),
1663 prev_counters_(),
1664 printed_header_(false) {}
1665
1666 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1667 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1668
1669 protected:
1670 virtual void PrintRunData(const Run& report);
1671 virtual void PrintHeader(const Run& report);
1672
1673 OutputOptions output_options_;
1674 size_t name_field_width_;
1675 UserCounters prev_counters_;
1676 bool printed_header_;
1677};
1678
1679class JSONReporter : public BenchmarkReporter {
1680 public:
1681 JSONReporter() : first_report_(true) {}
1682 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1683 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1684 virtual void Finalize() BENCHMARK_OVERRIDE;
1685
1686 private:
1687 void PrintRunData(const Run& report);
1688
1689 bool first_report_;
1690};
1691
1692class BENCHMARK_DEPRECATED_MSG(
1693 "The CSV Reporter will be removed in a future release") CSVReporter
1694 : public BenchmarkReporter {
1695 public:
1696 CSVReporter() : printed_header_(false) {}
1697 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1698 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1699
1700 private:
1701 void PrintRunData(const Run& report);
1702
1703 bool printed_header_;
1704 std::set<std::string> user_counter_names_;
1705};
1706
1707
1708inline const char* GetTimeUnitString(TimeUnit unit) {
1709 switch (unit) {
1710 case kSecond:
1711 return "s";
1712 case kMillisecond:
1713 return "ms";
1714 case kMicrosecond:
1715 return "us";
1716 case kNanosecond:
1717 return "ns";
1718 }
1719 BENCHMARK_UNREACHABLE();
1720}
1721
1722inline double GetTimeUnitMultiplier(TimeUnit unit) {
1723 switch (unit) {
1724 case kSecond:
1725 return 1;
1726 case kMillisecond:
1727 return 1e3;
1728 case kMicrosecond:
1729 return 1e6;
1730 case kNanosecond:
1731 return 1e9;
1732 }
1733 BENCHMARK_UNREACHABLE();
1734}
1735
1736// Creates a list of integer values for the given range and multiplier.
1737// This can be used together with ArgsProduct() to allow multiple ranges
1738// with different multiplers.
1739// Example:
1740// ArgsProduct({
1741// CreateRange(0, 1024, /*multi=*/32),
1742// CreateRange(0, 100, /*multi=*/4),
1743// CreateDenseRange(0, 4, /*step=*/1),
1744// });
1745std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
1746
1747// Creates a list of integer values for the given range and step.
1748std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit,
1749 int step);
1750
1751} // namespace benchmark
1752
1753#endif // BENCHMARK_BENCHMARK_H_
1754

source code of flutter_engine/third_party/benchmark/include/benchmark/benchmark.h