Files
android_external_libcxx/utils/google-benchmark/test/benchmark_test.cc
Eric Fiselier d9b9ef75a8 [libcxx] Add support for benchmark tests using Google Benchmark.
Summary:
This patch does the following:

1. Checks in a copy of the Google Benchmark library into the libc++ repo under `utils/google-benchmark`.
2. Teaches libc++ how to build Google Benchmark against both (A) in-tree libc++ and (B) the platforms native STL.
3. Allows performance benchmarks to be built as part of the libc++ build.

Building the benchmarks (and Google Benchmark) is off by default. It must be enabled using the CMake option `-DLIBCXX_INCLUDE_BENCHMARKS=ON`. When this option is enabled the tests under `libcxx/benchmarks`  can be built using the `libcxx-benchmarks` target.

On Linux platforms where libstdc++ is the default STL the CMake option `-DLIBCXX_BUILD_BENCHMARKS_NATIVE_STDLIB=ON` can be used to build each benchmark test against libstdc++ as well. This is useful for comparing performance between standard libraries.

Support for benchmarks is currently very minimal. They must be manually run by the user and there is no mechanism for detecting performance regressions.

Known Issues:

* `-DLIBCXX_INCLUDE_BENCHMARKS=ON` is only supported for Clang, and not GCC, since the `-stdlib=libc++` option is needed to build Google Benchmark.








Reviewers: danalbert, dberlin, chandlerc, mclow.lists, jroelofs

Subscribers: chandlerc, dberlin, tberghammer, danalbert, srhines, hfinkel

Differential Revision: https://reviews.llvm.org/D22240

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@276049 91177308-0d34-0410-b5e6-96231b3b80d8
2016-07-19 23:07:03 +00:00

225 lines
6.2 KiB
C++

#include "benchmark/benchmark.h"
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <cstdlib>
#include <iostream>
#include <limits>
#include <list>
#include <map>
#include <mutex>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include <chrono>
#include <thread>
#include <utility>
#if defined(__GNUC__)
# define BENCHMARK_NOINLINE __attribute__((noinline))
#else
# define BENCHMARK_NOINLINE
#endif
namespace {
int BENCHMARK_NOINLINE Factorial(uint32_t n) {
return (n == 1) ? 1 : n * Factorial(n - 1);
}
double CalculatePi(int depth) {
double pi = 0.0;
for (int i = 0; i < depth; ++i) {
double numerator = static_cast<double>(((i % 2) * 2) - 1);
double denominator = static_cast<double>((2 * i) - 1);
pi += numerator / denominator;
}
return (pi - 1.0) * 4;
}
std::set<int> ConstructRandomSet(int size) {
std::set<int> s;
for (int i = 0; i < size; ++i)
s.insert(i);
return s;
}
std::mutex test_vector_mu;
std::vector<int>* test_vector = nullptr;
} // end namespace
static void BM_Factorial(benchmark::State& state) {
int fac_42 = 0;
while (state.KeepRunning())
fac_42 = Factorial(8);
// Prevent compiler optimizations
std::stringstream ss;
ss << fac_42;
state.SetLabel(ss.str());
}
BENCHMARK(BM_Factorial);
BENCHMARK(BM_Factorial)->UseRealTime();
static void BM_CalculatePiRange(benchmark::State& state) {
double pi = 0.0;
while (state.KeepRunning())
pi = CalculatePi(state.range_x());
std::stringstream ss;
ss << pi;
state.SetLabel(ss.str());
}
BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
static void BM_CalculatePi(benchmark::State& state) {
static const int depth = 1024;
while (state.KeepRunning()) {
benchmark::DoNotOptimize(CalculatePi(depth));
}
}
BENCHMARK(BM_CalculatePi)->Threads(8);
BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
static void BM_SetInsert(benchmark::State& state) {
while (state.KeepRunning()) {
state.PauseTiming();
std::set<int> data = ConstructRandomSet(state.range_x());
state.ResumeTiming();
for (int j = 0; j < state.range_y(); ++j)
data.insert(rand());
}
state.SetItemsProcessed(state.iterations() * state.range_y());
state.SetBytesProcessed(state.iterations() * state.range_y() * sizeof(int));
}
BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
template<typename Container, typename ValueType = typename Container::value_type>
static void BM_Sequential(benchmark::State& state) {
ValueType v = 42;
while (state.KeepRunning()) {
Container c;
for (int i = state.range_x(); --i; )
c.push_back(v);
}
const size_t items_processed = state.iterations() * state.range_x();
state.SetItemsProcessed(items_processed);
state.SetBytesProcessed(items_processed * sizeof(v));
}
BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)->Range(1 << 0, 1 << 10);
BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
// Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
#if __cplusplus >= 201103L
BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
#endif
static void BM_StringCompare(benchmark::State& state) {
std::string s1(state.range_x(), '-');
std::string s2(state.range_x(), '-');
while (state.KeepRunning())
benchmark::DoNotOptimize(s1.compare(s2));
}
BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
static void BM_SetupTeardown(benchmark::State& state) {
if (state.thread_index == 0) {
// No need to lock test_vector_mu here as this is running single-threaded.
test_vector = new std::vector<int>();
}
int i = 0;
while (state.KeepRunning()) {
std::lock_guard<std::mutex> l(test_vector_mu);
if (i%2 == 0)
test_vector->push_back(i);
else
test_vector->pop_back();
++i;
}
if (state.thread_index == 0) {
delete test_vector;
}
}
BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
static void BM_LongTest(benchmark::State& state) {
double tracker = 0.0;
while (state.KeepRunning()) {
for (int i = 0; i < state.range_x(); ++i)
benchmark::DoNotOptimize(tracker += i);
}
}
BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
static void BM_ParallelMemset(benchmark::State& state) {
int size = state.range_x() / sizeof(int);
int thread_size = size / state.threads;
int from = thread_size * state.thread_index;
int to = from + thread_size;
if (state.thread_index == 0) {
test_vector = new std::vector<int>(size);
}
while (state.KeepRunning()) {
for (int i = from; i < to; i++) {
// No need to lock test_vector_mu as ranges
// do not overlap between threads.
benchmark::DoNotOptimize(test_vector->at(i) = 1);
}
}
if (state.thread_index == 0) {
delete test_vector;
}
}
BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
static void BM_ManualTiming(benchmark::State& state) {
size_t slept_for = 0;
int microseconds = state.range_x();
std::chrono::duration<double, std::micro> sleep_duration {
static_cast<double>(microseconds)
};
while (state.KeepRunning()) {
auto start = std::chrono::high_resolution_clock::now();
// Simulate some useful workload with a sleep
std::this_thread::sleep_for(std::chrono::duration_cast<
std::chrono::nanoseconds>(sleep_duration));
auto end = std::chrono::high_resolution_clock::now();
auto elapsed =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed.count());
slept_for += microseconds;
}
state.SetItemsProcessed(slept_for);
}
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
#if __cplusplus >= 201103L
template <class ...Args>
void BM_with_args(benchmark::State& state, Args&&...) {
while (state.KeepRunning()) {}
}
BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44);
BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test,
std::string("abc"), std::pair<int, double>(42, 3.8));
void BM_non_template_args(benchmark::State& state, int, double) {
while(state.KeepRunning()) {}
}
BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
#endif // __cplusplus >= 201103L
BENCHMARK_MAIN()