[libcxx] Add support for benchmark tests using Google Benchmark.

Summary: This patch does the following: 1. Checks in a copy of the Google Benchmark library into the libc++ repo under `utils/google-benchmark`. 2. Teaches libc++ how to build Google Benchmark against both (A) in-tree libc++ and (B) the platforms native STL. 3. Allows performance benchmarks to be built as part of the libc++ build. Building the benchmarks (and Google Benchmark) is off by default. It must be enabled using the CMake option `-DLIBCXX_INCLUDE_BENCHMARKS=ON`. When this option is enabled the tests under `libcxx/benchmarks` can be built using the `libcxx-benchmarks` target. On Linux platforms where libstdc++ is the default STL the CMake option `-DLIBCXX_BUILD_BENCHMARKS_NATIVE_STDLIB=ON` can be used to build each benchmark test against libstdc++ as well. This is useful for comparing performance between standard libraries. Support for benchmarks is currently very minimal. They must be manually run by the user and there is no mechanism for detecting performance regressions. Known Issues: * `-DLIBCXX_INCLUDE_BENCHMARKS=ON` is only supported for Clang, and not GCC, since the `-stdlib=libc++` option is needed to build Google Benchmark. Reviewers: danalbert, dberlin, chandlerc, mclow.lists, jroelofs Subscribers: chandlerc, dberlin, tberghammer, danalbert, srhines, hfinkel Differential Revision: https://reviews.llvm.org/D22240 git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@276049 91177308-0d34-0410-b5e6-96231b3b80d8
2016-07-19 23:07:03 +00:00
parent 7310ec83f3
commit d9b9ef75a8
73 changed files with 9310 additions and 31 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -56,6 +56,8 @@ option(LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY "Build libc++experimental.a" ON)
 option(LIBCXX_ENABLE_FILESYSTEM
        "Build filesystem as part of libc++experimental.a" ${LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY})
 option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS})
 option(LIBCXX_INCLUDE_BENCHMARKS "Build the libc++ benchmarks and their dependancies" OFF)
 option(LIBCXX_BUILD_BENCHMARK_NATIVE_STDLIB "Build the benchmarks against the native STL" OFF)
 option(LIBCXX_INCLUDE_DOCS "Build the libc++ documentation." ${LLVM_INCLUDE_DOCS})
 set(LIBCXX_LIBDIR_SUFFIX "${LLVM_LIBDIR_SUFFIX}" CACHE STRING
    "Define suffix of library directory name (32/64)")
@@ -426,6 +428,9 @@ include_directories(include)
 add_subdirectory(include)
 add_subdirectory(lib)
 if (LIBCXX_INCLUDE_BENCHMARKS)
  add_subdirectory(benchmarks)
 endif()
 if (LIBCXX_INCLUDE_TESTS)
  add_subdirectory(test)
 endif()
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -0,0 +1,119 @@
 include(ExternalProject)
 include(CheckCXXCompilerFlag)
 #==============================================================================
 # Build Google Benchmark for libc++
 #==============================================================================
 check_cxx_compiler_flag(-stdlib=libc++ LIBCXX_HAS_NO_STDLIB_LIBCXX_FLAG)
 if (NOT LIBCXX_HAS_NO_STDLIB_LIBCXX_FLAG)
  message(FATAL "Benchmark requires support for the -stdlib=libc++ flag")
 endif()
 set(BENCHMARK_LIBCXX_COMPILE_FLAGS
    -Wno-unused-command-line-argument
    -nostdinc++
    -cxx-isystem ${LIBCXX_SOURCE_DIR}/include
    -stdlib=libc++)
 set(BENCHMARK_LIBCXX_LINK_FLAGS
    -L${LIBCXX_LIBRARY_DIR}
    -Wl,-rpath,${LIBCXX_LIBRARY_DIR})
 split_list(BENCHMARK_LIBCXX_COMPILE_FLAGS)
 split_list(BENCHMARK_LIBCXX_LINK_FLAGS)
 ExternalProject_Add(google-benchmark-libcxx
        EXCLUDE_FROM_ALL ON
        DEPENDS cxx
        PREFIX benchmark-libcxx
        SOURCE_DIR ${LIBCXX_SOURCE_DIR}/utils/google-benchmark
        INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/benchmark-libcxx
        CMAKE_CACHE_DEFAULT_ARGS
          -DCMAKE_BUILD_TYPE:STRING=RELEASE
          -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
          -DCMAKE_CXX_FLAGS:STRING=${BENCHMARK_LIBCXX_COMPILE_FLAGS}
          -DCMAKE_SHARED_LINK_FLAGS:STRING=${BENCHMARK_LIBCXX_LINK_FLAGS}
          -DCMAKE_EXE_LINK_FLAGS:STRING=${BENCHMARK_LIBCXX_LINK_FLAGS}
          -DBENCHMARK_ENABLE_TESTING:BOOL=OFF)
 #==============================================================================
 # Build Google Benchmark for the native stdlib
 #==============================================================================
 if (LIBCXX_BUILD_BENCHMARK_NATIVE_STDLIB)
  ExternalProject_Add(google-benchmark-native
        EXCLUDE_FROM_ALL ON
        PREFIX benchmark-native
        SOURCE_DIR ${LIBCXX_SOURCE_DIR}/utils/google-benchmark
        INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/benchmark-native
        CMAKE_CACHE_ARGS
          -DBENCHMARK_ENABLE_TESTING:BOOL=OFF
        CMAKE_CACHE_DEFAULT_ARGS
          -DCMAKE_BUILD_TYPE:STRING=RELEASE
          -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>)
 endif()
 #==============================================================================
 # Benchmark tests configuration
 #==============================================================================
 add_custom_target(libcxx-benchmarks)
 set(BENCHMARK_LIBCXX_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/benchmark-libcxx)
 set(BENCHMARK_NATIVE_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/benchmark-native)
 set(BENCHMARK_TEST_COMPILE_FLAGS
    -std=c++14 -O2
    -I${BENCHMARK_LIBCXX_INSTALL}/include
 )
 set(BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS
    -nostdinc++
    ${BENCHMARK_TEST_COMPILE_FLAGS}
    -Wno-user-defined-literals
 )
 set(BENCHMARK_TEST_LIBCXX_LINK_FLAGS
    -nodefaultlibs
    -L${BENCHMARK_LIBCXX_INSTALL}/lib/
 )
 set(BENCHMARK_TEST_NATIVE_LINK_FLAGS
    -L${BENCHMARK_NATIVE_INSTALL}/lib/
 )
 split_list(BENCHMARK_TEST_COMPILE_FLAGS)
 split_list(BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS)
 split_list(BENCHMARK_TEST_LIBCXX_LINK_FLAGS)
 split_list(BENCHMARK_TEST_NATIVE_LINK_FLAGS)
 macro(add_benchmark_test name source_file)
  set(libcxx_target ${name}_libcxx)
  add_executable(${libcxx_target} EXCLUDE_FROM_ALL ${source_file})
  add_dependencies(${libcxx_target} cxx google-benchmark-libcxx)
  add_dependencies(libcxx-benchmarks ${libcxx_target})
  target_link_libraries(${libcxx_target} cxx -lbenchmark)
  set_target_properties(${libcxx_target}
    PROPERTIES
          OUTPUT_NAME "${name}.libcxx.out"
          COMPILE_FLAGS "${BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS}"
          LINK_FLAGS "${BENCHMARK_TEST_LIBCXX_LINK_FLAGS}")
  if (LIBCXX_BUILD_BENCHMARK_NATIVE_STDLIB)
    set(native_target ${name}_native)
    add_executable(${native_target} EXCLUDE_FROM_ALL ${source_file})
    add_dependencies(${native_target} google-benchmark-native)
    target_link_libraries(${native_target} -lbenchmark)
    if (LIBCXX_HAS_PTHREAD_LIB)
      target_link_libraries(${native_target} -pthread)
    endif()
    add_dependencies(libcxx-benchmarks ${native_target})
    set_target_properties(${native_target}
      PROPERTIES
          OUTPUT_NAME "${name}.native.out"
          INCLUDE_DIRECTORIES ""
          COMPILE_FLAGS "${BENCHMARK_TEST_COMPILE_FLAGS}"
          LINK_FLAGS "${BENCHMARK_TEST_NATIVE_LINK_FLAGS}")
  endif()
 endmacro()
 #==============================================================================
 # Register Benchmark tests
 #==============================================================================
 file(GLOB BENCHMARK_TESTS "*.bench.cpp")
 foreach(test_path ${BENCHMARK_TESTS})
  get_filename_component(test_file "${test_path}" NAME)
  message(STATUS "TEST: ${test_file}")
  string(REPLACE ".bench.cpp" "" test_name "${test_file}")
  add_benchmark_test(${test_name} ${test_file})
 endforeach()
--- a/benchmarks/ContainerBenchmarks.hpp
+++ b/benchmarks/ContainerBenchmarks.hpp
@@ -0,0 +1,69 @@
 #ifndef BENCHMARK_CONTAINER_BENCHMARKS_HPP
 #define BENCHMARK_CONTAINER_BENCHMARKS_HPP
 #include <cassert>
 #include "benchmark/benchmark_api.h"
 namespace ContainerBenchmarks {
 template <class Container, class GenInputs>
 void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
    auto in = gen(st.range_x());
    const auto end = in.end();
    while (st.KeepRunning()) {
        c.clear();
        for (auto it = in.begin(); it != end; ++it) {
            benchmark::DoNotOptimize(&(*c.insert(*it).first));
        }
        benchmark::ClobberMemory();
    }
 }
 template <class Container, class GenInputs>
 void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {
    auto in = gen(st.range_x());
    const auto end = in.end();
    while (st.KeepRunning()) {
        c.clear();
        c.rehash(16);
        for (auto it = in.begin(); it != end; ++it) {
            benchmark::DoNotOptimize(&(*c.insert(*it).first));
        }
        benchmark::ClobberMemory();
    }
 }
 template <class Container, class GenInputs>
 static void BM_Find(benchmark::State& st, Container c, GenInputs gen) {
    auto in = gen(st.range_x());
    c.insert(in.begin(), in.end());
    benchmark::DoNotOptimize(&(*c.begin()));
    const auto end = in.data() + in.size();
    while (st.KeepRunning()) {
        for (auto it = in.data(); it != end; ++it) {
            benchmark::DoNotOptimize(&(*c.find(*it)));
        }
        benchmark::ClobberMemory();
    }
 }
 template <class Container, class GenInputs>
 static void BM_FindRehash(benchmark::State& st, Container c, GenInputs gen) {
    c.rehash(8);
    auto in = gen(st.range_x());
    c.insert(in.begin(), in.end());
    benchmark::DoNotOptimize(&(*c.begin()));
    const auto end = in.data() + in.size();
    while (st.KeepRunning()) {
        for (auto it = in.data(); it != end; ++it) {
            benchmark::DoNotOptimize(&(*c.find(*it)));
        }
        benchmark::ClobberMemory();
    }
 }
 } // end namespace ContainerBenchmarks
 #endif // BENCHMARK_CONTAINER_BENCHMARKS_HPP
--- a/benchmarks/GenerateInput.hpp
+++ b/benchmarks/GenerateInput.hpp
@@ -0,0 +1,133 @@
 #ifndef BENCHMARK_GENERATE_INPUT_HPP
 #define BENCHMARK_GENERATE_INPUT_HPP
 #include <algorithm>
 #include <random>
 #include <vector>
 #include <string>
 #include <climits>
 #include <cstddef>
 static const char Letters[] = {
    '0','1','2','3','4',
    '5','6','7','8','9',
    'A','B','C','D','E','F',
    'G','H','I','J','K',
    'L','M','N','O','P',
    'Q','R','S','T','U',
    'V','W','X','Y','Z',
    'a','b','c','d','e','f',
    'g','h','i','j','k',
    'l','m','n','o','p',
    'q','r','s','t','u',
    'v','w','x','y','z'
 };
 static const std::size_t LettersSize = sizeof(Letters);
 inline std::default_random_engine& getRandomEngine() {
    static std::default_random_engine RandEngine(std::random_device{}());
    return RandEngine;
 }
 inline char getRandomChar() {
    std::uniform_int_distribution<> LettersDist(0, LettersSize-1);
    return Letters[LettersDist(getRandomEngine())];
 }
 template <class IntT>
 inline IntT getRandomInteger() {
    std::uniform_int_distribution<IntT> dist;
    return dist(getRandomEngine());
 }
 inline std::string getRandomString(std::size_t Len) {
    std::string str(Len, 0);
    std::generate_n(str.begin(), Len, &getRandomChar);
    return str;
 }
 template <class IntT>
 inline std::vector<IntT> getDuplicateIntegerInputs(size_t N) {
    std::vector<IntT> inputs(N, static_cast<IntT>(-1));
    return inputs;
 }
 template <class IntT>
 inline std::vector<IntT> getSortedIntegerInputs(size_t N) {
    std::vector<IntT> inputs;
    for (size_t i=0; i < N; i += 1)
        inputs.push_back(i);
    return inputs;
 }
 template <class IntT>
 std::vector<IntT> getSortedLargeIntegerInputs(size_t N) {
    std::vector<IntT> inputs;
    for (size_t i=0; i < N; ++i) {
        inputs.push_back(i + N);
    }
    return inputs;
 }
 template <class IntT>
 std::vector<IntT> getSortedTopBitsIntegerInputs(size_t N) {
    std::vector<IntT> inputs = getSortedIntegerInputs<IntT>(N);
    for (auto& E : inputs) E <<= ((sizeof(IntT) / 2) * CHAR_BIT);
    return inputs;
 }
 template <class IntT>
 inline std::vector<IntT> getReverseSortedIntegerInputs(size_t N) {
    std::vector<IntT> inputs;
    std::size_t i = N;
    while (i > 0) {
        --i;
        inputs.push_back(i);
    }
    return inputs;
 }
 template <class IntT>
 std::vector<IntT> getPipeOrganIntegerInputs(size_t N) {
    std::vector<IntT> v; v.reserve(N);
    for (size_t i = 0; i < N/2; ++i) v.push_back(i);
    for (size_t i = N/2; i < N; ++i) v.push_back(N - i);
    return v;
 }
 template <class IntT>
 std::vector<IntT> getRandomIntegerInputs(size_t N) {
    std::vector<IntT> inputs;
    for (size_t i=0; i < N; ++i) {
        inputs.push_back(getRandomInteger<IntT>());
    }
    return inputs;
 }
 inline std::vector<std::string> getDuplicateStringInputs(size_t N) {
    std::vector<std::string> inputs(N, getRandomString(1024));
    return inputs;
 }
 inline std::vector<std::string> getRandomStringInputs(size_t N) {
    std::vector<std::string> inputs;
    for (int i=0; i < N; ++i) {
        inputs.push_back(getRandomString(1024));
    }
    return inputs;
 }
 inline std::vector<std::string> getSortedStringInputs(size_t N) {
    std::vector<std::string> inputs = getRandomStringInputs(N);
    std::sort(inputs.begin(), inputs.end());
    return inputs;
 }
 inline std::vector<std::string> getReverseSortedStringInputs(size_t N) {
    std::vector<std::string> inputs = getSortedStringInputs(N);
    std::reverse(inputs.begin(), inputs.end());
    return inputs;
 }
 #endif // BENCHMARK_GENERATE_INPUT_HPP
--- a/benchmarks/algorithms.bench.cpp
+++ b/benchmarks/algorithms.bench.cpp
@@ -0,0 +1,62 @@
 #include <unordered_set>
 #include <vector>
 #include <cstdint>
 #include "benchmark/benchmark_api.h"
 #include "GenerateInput.hpp"
 constexpr std::size_t TestNumInputs = 1024;
 template <class GenInputs>
 void BM_Sort(benchmark::State& st, GenInputs gen) {
    using ValueType = typename decltype(gen(0))::value_type;
    const auto in = gen(st.range_x());
    std::vector<ValueType> inputs[5];
    auto reset_inputs = [&]() {
        for (auto& C : inputs) {
            C = in;
            benchmark::DoNotOptimize(C.data());
        }
    };
    reset_inputs();
    while (st.KeepRunning()) {
        for (auto& I : inputs) {
            std::sort(I.data(), I.data() + I.size());
            benchmark::DoNotOptimize(I.data());
        }
        st.PauseTiming();
        reset_inputs();
        benchmark::ClobberMemory();
        st.ResumeTiming();
    }
 }
 BENCHMARK_CAPTURE(BM_Sort, random_uint32,
    getRandomIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Sort, sorted_ascending_uint32,
    getSortedIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Sort, sorted_descending_uint32,
    getReverseSortedIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Sort, single_element_uint32,
    getDuplicateIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Sort, pipe_organ_uint32,
    getPipeOrganIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Sort, random_strings,
    getRandomStringInputs)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Sort, sorted_ascending_strings,
    getSortedStringInputs)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Sort, sorted_descending_strings,
    getReverseSortedStringInputs)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Sort, single_element_strings,
    getDuplicateStringInputs)->Arg(TestNumInputs);
 BENCHMARK_MAIN()
--- a/benchmarks/unordered_set_operations.bench.cpp
+++ b/benchmarks/unordered_set_operations.bench.cpp
@@ -1,44 +1,268 @@
 #include <unordered_set>
 #include <vector>
 #include <functional>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include "benchmark/benchmark_api.h"
-template <class IntT>
+#include "ContainerBenchmarks.hpp"
-std::vector<IntT> getInputs(size_t N) {
+#include "GenerateInput.hpp"
-    std::vector<IntT> inputs;
+
-    for (size_t i=0; i < N; ++i) {
+using namespace ContainerBenchmarks;
-        inputs.push_back(i);
+
-    }
+constexpr std::size_t TestNumInputs = 1024;
-    return inputs;
+
 template <class _Size>
 inline __attribute__((__always_inline__))
 _Size loadword(const void* __p) {
    _Size __r;
    std::memcpy(&__r, __p, sizeof(__r));
    return __r;
 }
-template <class Container, class Inputs>
+inline __attribute__((__always_inline__))
-void BM_SetInsert(benchmark::State& st, Container c, Inputs const& in) {
+std::size_t rotate_by_at_least_1(std::size_t __val, int __shift) {
-    const auto end = in.end();
+    return (__val >> __shift) | (__val << (64 - __shift));
 }
 inline __attribute__((__always_inline__))
 std::size_t hash_len_16(std::size_t __u, std::size_t __v) {
    const  std::size_t __mul = 0x9ddfea08eb382d69ULL;
    std::size_t __a = (__u ^ __v) * __mul;
    __a ^= (__a >> 47);
    std::size_t __b = (__v ^ __a) * __mul;
    __b ^= (__b >> 47);
    __b *= __mul;
    return __b;
 }
 template <std::size_t _Len>
 inline __attribute__((__always_inline__))
 std::size_t hash_len_0_to_8(const char* __s) {
    static_assert(_Len == 4 || _Len == 8, "");
    const uint64_t __a = loadword<uint32_t>(__s);
    const uint64_t __b = loadword<uint32_t>(__s + _Len - 4);
    return hash_len_16(_Len + (__a << 3), __b);
 }
 struct UInt32Hash {
  UInt32Hash() = default;
  inline __attribute__((__always_inline__))
  std::size_t operator()(uint32_t data) const {
      return hash_len_0_to_8<4>(reinterpret_cast<const char*>(&data));
  }
 };
 struct UInt64Hash {
  UInt64Hash() = default;
  inline __attribute__((__always_inline__))
  std::size_t operator()(uint64_t data) const {
      return hash_len_0_to_8<8>(reinterpret_cast<const char*>(&data));
  }
 };
 struct UInt128Hash {
  UInt128Hash() = default;
  inline __attribute__((__always_inline__))
  std::size_t operator()(__uint128_t data) const {
      const __uint128_t __mask = static_cast<std::size_t>(-1);
      const std::size_t __a = (std::size_t)(data & __mask);
      const std::size_t __b = (std::size_t)((data & (__mask << 64)) >> 64);
      return hash_len_16(__a, rotate_by_at_least_1(__b + 16, 16)) ^ __b;
  }
 };
 struct UInt32Hash2 {
  UInt32Hash2() = default;
  inline __attribute__((__always_inline__))
  std::size_t operator()(uint32_t data) const {
      const uint32_t __m = 0x5bd1e995;
      const uint32_t __r = 24;
      uint32_t __h = 4;
      uint32_t __k = data;
        __k *= __m;
        __k ^= __k >> __r;
        __k *= __m;
        __h *= __m;
        __h ^= __k;
        __h ^= __h >> 13;
        __h *= __m;
        __h ^= __h >> 15;
    return __h;
  }
 };
 struct UInt64Hash2 {
  UInt64Hash2() = default;
  inline __attribute__((__always_inline__))
  std::size_t operator()(uint64_t data) const {
      return hash_len_0_to_8<8>(reinterpret_cast<const char*>(&data));
  }
 };
 //----------------------------------------------------------------------------//
 //                               BM_Hash
 // ---------------------------------------------------------------------------//
 template <class HashFn, class GenInputs>
 void BM_Hash(benchmark::State& st, HashFn fn, GenInputs gen) {
    auto in = gen(st.range_x());
    const auto end = in.data() + in.size();
    std::size_t last_hash = 0;
    benchmark::DoNotOptimize(&last_hash);
    while (st.KeepRunning()) {
-        c.clear();
+        for (auto it = in.data(); it != end; ++it) {
-        for (auto it = in.begin(); it != end; ++it) {
+            benchmark::DoNotOptimize(last_hash += fn(*it));
            benchmark::DoNotOptimize(c.insert(*it));
        }
-        benchmark::DoNotOptimize(c);
+        benchmark::ClobberMemory();
    }
 }
 BENCHMARK_CAPTURE(BM_SetInsert, uint32_insert,
    std::unordered_set<uint32_t>{}, getInputs<uint32_t>(1024));
-template <class Container, class Inputs>
+BENCHMARK_CAPTURE(BM_Hash,
-void BM_SetFind(benchmark::State& st, Container c, Inputs const& in) {
+    uint32_random_std_hash,
-    c.insert(in.begin(), in.end());
+    std::hash<uint32_t>{},
-    const auto end = in.end();
+    getRandomIntegerInputs<uint32_t>) -> Arg(TestNumInputs);
    while (st.KeepRunning()) {
        for (auto it = in.begin(); it != end; ++it) {
            benchmark::DoNotOptimize(c.find(*it));
        }
    }
 }
 BENCHMARK_CAPTURE(BM_SetFind, uint32_lookup,
    std::unordered_set<uint32_t>{}, getInputs<uint32_t>(1024));
 BENCHMARK_CAPTURE(BM_Hash,
    uint32_random_custom_hash,
    UInt32Hash{},
    getRandomIntegerInputs<uint32_t>) -> Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Hash,
    uint32_top_std_hash,
    std::hash<uint32_t>{},
    getSortedTopBitsIntegerInputs<uint32_t>) -> Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_Hash,
    uint32_top_custom_hash,
    UInt32Hash{},
    getSortedTopBitsIntegerInputs<uint32_t>) -> Arg(TestNumInputs);
 //----------------------------------------------------------------------------//
 //                       BM_InsertValue
 // ---------------------------------------------------------------------------//
 // Sorted Assending // 
 BENCHMARK_CAPTURE(BM_InsertValue,
    unordered_set_uint32,
    std::unordered_set<uint32_t>{},
    getRandomIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_InsertValue,
    unordered_set_uint32_sorted,
    std::unordered_set<uint32_t>{},
    getSortedIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 // Top Bytes // 
 BENCHMARK_CAPTURE(BM_InsertValue,
    unordered_set_top_bits_uint32,
    std::unordered_set<uint32_t>{},
    getSortedTopBitsIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_InsertValueRehash,
    unordered_set_top_bits_uint32,
    std::unordered_set<uint32_t, UInt32Hash>{},
    getSortedTopBitsIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 // String //
 BENCHMARK_CAPTURE(BM_InsertValue,
    unordered_set_string,
    std::unordered_set<std::string>{},
    getRandomStringInputs)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_InsertValueRehash,
    unordered_set_string,
    std::unordered_set<std::string>{},
    getRandomStringInputs)->Arg(TestNumInputs);
 //----------------------------------------------------------------------------//
 //                         BM_Find
 // ---------------------------------------------------------------------------//
 // Random //
 BENCHMARK_CAPTURE(BM_Find,
    unordered_set_random_uint64,
    std::unordered_set<uint64_t>{},
    getRandomIntegerInputs<uint64_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_FindRehash,
    unordered_set_random_uint64,
    std::unordered_set<uint64_t, UInt64Hash>{},
    getRandomIntegerInputs<uint64_t>)->Arg(TestNumInputs);
 // Sorted //
 BENCHMARK_CAPTURE(BM_Find,
    unordered_set_sorted_uint64,
    std::unordered_set<uint64_t>{},
    getSortedIntegerInputs<uint64_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_FindRehash,
    unordered_set_sorted_uint64,
    std::unordered_set<uint64_t, UInt64Hash>{},
    getSortedIntegerInputs<uint64_t>)->Arg(TestNumInputs);
 // Sorted //
 #if 1
 BENCHMARK_CAPTURE(BM_Find,
    unordered_set_sorted_uint128,
    std::unordered_set<__uint128_t, UInt128Hash>{},
    getSortedTopBitsIntegerInputs<__uint128_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_FindRehash,
    unordered_set_sorted_uint128,
    std::unordered_set<__uint128_t, UInt128Hash>{},
    getSortedTopBitsIntegerInputs<__uint128_t>)->Arg(TestNumInputs);
 #endif
 // Sorted //
 BENCHMARK_CAPTURE(BM_Find,
    unordered_set_sorted_uint32,
    std::unordered_set<uint32_t>{},
    getSortedIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_FindRehash,
    unordered_set_sorted_uint32,
    std::unordered_set<uint32_t, UInt32Hash2>{},
    getSortedIntegerInputs<uint32_t>)->Arg(TestNumInputs);
 // Sorted Ascending //
 BENCHMARK_CAPTURE(BM_Find,
    unordered_set_sorted_large_uint64,
    std::unordered_set<uint64_t>{},
    getSortedLargeIntegerInputs<uint64_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_FindRehash,
    unordered_set_sorted_large_uint64,
    std::unordered_set<uint64_t, UInt64Hash>{},
    getSortedLargeIntegerInputs<uint64_t>)->Arg(TestNumInputs);
 // Top Bits //
 BENCHMARK_CAPTURE(BM_Find,
    unordered_set_top_bits_uint64,
    std::unordered_set<uint64_t>{},
    getSortedTopBitsIntegerInputs<uint64_t>)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_FindRehash,
    unordered_set_top_bits_uint64,
    std::unordered_set<uint64_t, UInt64Hash>{},
    getSortedTopBitsIntegerInputs<uint64_t>)->Arg(TestNumInputs);
 // String //
 BENCHMARK_CAPTURE(BM_Find,
    unordered_set_string,
    std::unordered_set<std::string>{},
    getRandomStringInputs)->Arg(TestNumInputs);
 BENCHMARK_CAPTURE(BM_FindRehash,
    unordered_set_string,
    std::unordered_set<std::string>{},
    getRandomStringInputs)->Arg(TestNumInputs);
 BENCHMARK_MAIN()
--- a/docs/BuildingLibcxx.rst
+++ b/docs/BuildingLibcxx.rst
@@ -227,7 +227,7 @@ ABI Library Specific Options
  libc++abi is the C++ ABI library used.
-libc++ Feature options
+libc++ Feature Options
 ----------------------
 .. option:: LIBCXX_ENABLE_EXCEPTIONS:BOOL
@@ -242,9 +242,25 @@ libc++ Feature options
  Build libc++ with run time type information.
 .. option:: LIBCXX_INCLUDE_BENCHMARKS:BOOL
-libc++ Feature options
+  **Default**: ``OFF``
----------------------
+
  Build the libc++ benchmark tests and the Google Benchmark library needed
  to support them.
 .. option:: LIBCXX_BUILD_BENCHMARK_NATIVE_STDLIB:BOOL
  **Default**:: ``OFF``
  Build the libc++ benchmark tests and Google Benchmark library against the
  native standard library on the platform. On linux this can be used to compare
  libc++ to libstdc++ by building the benchmark tests against both standard
  libraries.
 libc++ ABI Feature Options
 --------------------------
 The following options allow building libc++ for a different ABI version.
--- a/docs/TestingLibcxx.rst
+++ b/docs/TestingLibcxx.rst
@@ -198,3 +198,62 @@ Environment Variables
  If ``LIBCXX_COLOR_DIAGNOSTICS`` is defined then the test suite will attempt
  to use color diagnostic outputs from the compiler.
  Also see :option:`color_diagnostics`.
 Benchmarks
 ==========
 Libc++ contains benchmark tests separately from the test of the test suite.
 The benchmarks are written using the `Google Benchmark`_ library, a copy of which
 is stored in the libc++ repository.
 For more information about using the Google Benchmark library see the
 `official documentation <https://github.com/google/benchmark>`_.
 .. _`Google Benchmark`: https://github.com/google/benchmark
 Building Benchmarks
 -------------------
 The benchmark tests are not enabled by default. To build the benchmarks
 libc++ must be configured using the CMake option ``-DLIBCXX_INCLUDE_BENCHMARKS=ON``.
 Then the benchmarks can be built using the ``libcxx-benchmarks`` target.
 An example build would look like:
 .. code-block:: bash
  $ cd build
  $ cmake [options] -DLIBCXX_INCLUDE_BENCHMARKS=ON <path to libcxx sources>
  $ make libcxx-benchmarks
 This will build all of the benchmarks under ``<libcxx-src>/benchmarks`` to be
 built against the just-built libc++. The compiled tests are output into
 ``build/benchmarks``.
 The benchmarks can also be built against the platforms native standard library
 using the ``-DLIBCXX_BUILD_BENCHMARKS_NATIVE_STDLIB=ON`` CMake option. This
 is useful for comparing the performance of libc++ to other standard libraries.
 The compiled benchmarks are named ``<test>.libcxx.out`` if they test libc++ and
 ``<test>.native.out`` otherwise.
 Also See:
  * :ref:`Building Libc++ <build instructions>`
  * :ref:`CMake Options`
 Running Benchmarks
 ------------------
 The benchmarks must be run manually by the user. Currently there is no way
 to run them as part of the build.
 For example:
 .. code-block:: bash
  $ cd build/benchmarks
  $ make libcxx-benchmarks
  $ ./algorithms.libcxx.out # Runs all the benchmarks
  $ ./algorithms.libcxx.out --benchmark_filter=BM_Sort.* # Only runs the sort benchmarks
 For more information about running benchmarks see `Google Benchmark`_.
--- a/utils/google-benchmark/.gitignore
+++ b/utils/google-benchmark/.gitignore
@@ -0,0 +1,46 @@
 *.a
 *.so
 *.so.?*
 *.dll
 *.exe
 *.dylib
 *.cmake
 !/cmake/*.cmake
 *~
 *.pyc
 __pycache__
 # lcov
 *.lcov
 /lcov
 # cmake files.
 /Testing
 CMakeCache.txt
 CMakeFiles/
 cmake_install.cmake
 # makefiles.
 Makefile
 # in-source build.
 bin/
 lib/
 /test/*_test
 # exuberant ctags.
 tags
 # YouCompleteMe configuration.
 .ycm_extra_conf.pyc
 # ninja generated files.
 .ninja_deps
 .ninja_log
 build.ninja
 install_manifest.txt
 rules.ninja
 # out-of-source build top-level folders.
 build/
 _build/
--- a/utils/google-benchmark/AUTHORS
+++ b/utils/google-benchmark/AUTHORS
@@ -0,0 +1,33 @@
 # This is the official list of benchmark authors for copyright purposes.
 # This file is distinct from the CONTRIBUTORS files.
 # See the latter for an explanation.
 #
 # Names should be added to this file as:
 #	Name or Organization <email address>
 # The email address is not required for organizations.
 #
 # Please keep the list sorted.
 Albert Pretorius <pretoalb@gmail.com>
 Arne Beer <arne@twobeer.de>
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
 Dominic Hamon <dma@stripysock.com>
 Eugene Zhuk <eugene.zhuk@gmail.com>
 Evgeny Safronov <division494@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
 Google Inc.
 Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
 JianXiong Zhou <zhoujianxiong2@gmail.com>
 Jussi Knuuttila <jussi.knuuttila@gmail.com>
 Kaito Udagawa <umireon@gmail.com>
 Lei Xu <eddyxu@gmail.com>
 Matt Clarkson <mattyclarkson@gmail.com>
 Oleksandr Sochka <sasha.sochka@gmail.com>
 Paul Redmond <paul.redmond@gmail.com>
 Radoslav Yovchev <radoslav.tm@gmail.com>
 Shuo Chen <chenshuo@chenshuo.com>
 Yusuke Suzuki <utatane.tea@gmail.com>
 Dirac Research 
 Zbigniew Skowron <zbychs@gmail.com>
 Dominik Czarnota <dominik.b.czarnota@gmail.com>
--- a/utils/google-benchmark/CMakeLists.txt
+++ b/utils/google-benchmark/CMakeLists.txt
@@ -0,0 +1,147 @@
 cmake_minimum_required (VERSION 2.8.11)
 project (benchmark)
 foreach(p
    CMP0054 # CMake 3.1
    CMP0056 # export EXE_LINKER_FLAGS to try_run
    )
  if(POLICY ${p})
    cmake_policy(SET ${p} NEW)
  endif()
 endforeach()
 option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
 option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF)
 # Make sure we can import out CMake functions
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 # Read the git tags to determine the project version
 include(GetGitVersion)
 get_git_version(GIT_VERSION)
 # Tell the user what versions we are using
 string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION})
 message("-- Version: ${VERSION}")
 # The version of the libraries
 set(GENERIC_LIB_VERSION ${VERSION})
 string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION)
 # Import our CMake modules
 include(CheckCXXCompilerFlag)
 include(AddCXXCompilerFlag)
 include(CXXFeatureCheck)
 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
  # Turn compiler warnings up to 11
  string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
  # Link time optimisation
  if (BENCHMARK_ENABLE_LTO)
    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GL")
    set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "${CMAKE_STATIC_LINKER_FLAGS_RELEASE} /LTCG")
    set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /LTCG")
    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG")
    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /GL")
    string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO}")
    set(CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO} /LTCG")
    string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO}")
    set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /LTCG")
    string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO}")
    set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /LTCG")
    set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /GL")
    set(CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL "${CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL} /LTCG")
    set(CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL "${CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL} /LTCG")
    set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG")
  endif()
 else()
  # Try and enable C++11. Don't use C++14 because it doesn't work in some
  # configurations.
  add_cxx_compiler_flag(-std=c++11)
  if (NOT HAVE_CXX_FLAG_STD_CXX11)
    add_cxx_compiler_flag(-std=c++0x)
  endif()
  # Turn compiler warnings up to 11
  add_cxx_compiler_flag(-Wall)
  add_cxx_compiler_flag(-Wextra)
  add_cxx_compiler_flag(-Wshadow)
  add_cxx_compiler_flag(-Werror RELEASE)
  add_cxx_compiler_flag(-Werror RELWITHDEBINFO)
  add_cxx_compiler_flag(-Werror MINSIZEREL)
  add_cxx_compiler_flag(-pedantic)
  add_cxx_compiler_flag(-pedantic-errors)
  add_cxx_compiler_flag(-Wshorten-64-to-32)
  add_cxx_compiler_flag(-Wfloat-equal)
  add_cxx_compiler_flag(-Wzero-as-null-pointer-constant)
  add_cxx_compiler_flag(-fstrict-aliasing)
  if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
    add_cxx_compiler_flag(-Wstrict-aliasing)
  endif()
  add_cxx_compiler_flag(-Wthread-safety)
  if (HAVE_WTHREAD_SAFETY)
    add_definitions(-DHAVE_WTHREAD_SAFETY)
    cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
  endif()
  # Link time optimisation
  if (BENCHMARK_ENABLE_LTO)
    add_cxx_compiler_flag(-flto)
    if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
      find_program(GCC_AR gcc-ar)
      if (GCC_AR)
        set(CMAKE_AR ${GCC_AR})
      endif()
      find_program(GCC_RANLIB gcc-ranlib)
      if (GCC_RANLIB)
        set(CMAKE_RANLIB ${GCC_RANLIB})
      endif()
    endif()
  endif()
  # Coverage build type
  set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING
    "Flags used by the C++ compiler during coverage builds."
    FORCE)
  set(CMAKE_EXE_LINKER_FLAGS_COVERAGE
    "${CMAKE_EXE_LINKER_FLAGS_DEBUG}" CACHE STRING
    "Flags used for linking binaries during coverage builds."
    FORCE)
  set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE
    "${CMAKE_SHARED_LINKER_FLAGS_DEBUG}" CACHE STRING
    "Flags used by the shared libraries linker during coverage builds."
    FORCE)
  mark_as_advanced(
    CMAKE_CXX_FLAGS_COVERAGE
    CMAKE_EXE_LINKER_FLAGS_COVERAGE
    CMAKE_SHARED_LINKER_FLAGS_COVERAGE)
  set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING
    "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel Coverage."
    FORCE)
  add_cxx_compiler_flag(--coverage COVERAGE)
 endif()
 # C++ feature checks
 cxx_feature_check(STD_REGEX)
 cxx_feature_check(GNU_POSIX_REGEX)
 cxx_feature_check(POSIX_REGEX)
 cxx_feature_check(STEADY_CLOCK)
 # Ensure we have pthreads
 find_package(Threads REQUIRED)
 # Set up directories
 include_directories(${PROJECT_SOURCE_DIR}/include)
 # Build the targets
 add_subdirectory(src)
 if (BENCHMARK_ENABLE_TESTING)
  enable_testing()
  add_subdirectory(test)
 endif()
--- a/utils/google-benchmark/CONTRIBUTING.md
+++ b/utils/google-benchmark/CONTRIBUTING.md
@@ -0,0 +1,58 @@
 # How to contribute #
 We'd love to accept your patches and contributions to this project.  There are
 a just a few small guidelines you need to follow.
 ## Contributor License Agreement ##
 Contributions to any Google project must be accompanied by a Contributor
 License Agreement.  This is not a copyright **assignment**, it simply gives
 Google permission to use and redistribute your contributions as part of the
 project.
  * If you are an individual writing original source code and you're sure you
    own the intellectual property, then you'll need to sign an [individual
    CLA][].
  * If you work for a company that wants to allow you to contribute your work,
    then you'll need to sign a [corporate CLA][].
 You generally only need to submit a CLA once, so if you've already submitted
 one (even if it was for a different project), you probably don't need to do it
 again.
 [individual CLA]: https://developers.google.com/open-source/cla/individual
 [corporate CLA]: https://developers.google.com/open-source/cla/corporate
 Once your CLA is submitted (or if you already submitted one for
 another Google project), make a commit adding yourself to the
 [AUTHORS][] and [CONTRIBUTORS][] files. This commit can be part
 of your first [pull request][].
 [AUTHORS]: AUTHORS
 [CONTRIBUTORS]: CONTRIBUTORS
 ## Submitting a patch ##
  1. It's generally best to start by opening a new issue describing the bug or
     feature you're intending to fix.  Even if you think it's relatively minor,
     it's helpful to know what people are working on.  Mention in the initial
     issue that you are planning to work on that bug or feature so that it can
     be assigned to you.
  1. Follow the normal process of [forking][] the project, and setup a new
     branch to work in.  It's important that each group of changes be done in
     separate branches in order to ensure that a pull request only includes the
     commits related to that bug or feature.
  1. Do your best to have [well-formed commit messages][] for each change.
     This provides consistency throughout the project, and ensures that commit
     messages are able to be formatted properly by various git tools.
  1. Finally, push the commits to your fork and submit a [pull request][].
 [forking]: https://help.github.com/articles/fork-a-repo
 [well-formed commit messages]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html
 [pull request]: https://help.github.com/articles/creating-a-pull-request
--- a/utils/google-benchmark/CONTRIBUTORS
+++ b/utils/google-benchmark/CONTRIBUTORS
@@ -0,0 +1,52 @@
 # People who have agreed to one of the CLAs and can contribute patches.
 # The AUTHORS file lists the copyright holders; this file
 # lists people.  For example, Google employees are listed here
 # but not in AUTHORS, because Google holds the copyright.
 #
 # Names should be added to this file only after verifying that
 # the individual or the individual's organization has agreed to
 # the appropriate Contributor License Agreement, found here:
 #
 # https://developers.google.com/open-source/cla/individual
 # https://developers.google.com/open-source/cla/corporate
 #
 # The agreement for individuals can be filled out on the web.
 #
 # When adding J Random Contributor's name to this file,
 # either J's name or J's organization's name should be
 # added to the AUTHORS file, depending on whether the
 # individual or corporate CLA was used.
 #
 # Names should be added to this file as:
 #     Name <email address>
 #
 # Please keep the list sorted.
 Albert Pretorius <pretoalb@gmail.com>
 Arne Beer <arne@twobeer.de>
 Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
 Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
 Dominic Hamon <dma@stripysock.com>
 Eric Fiselier <eric@efcs.ca>
 Eugene Zhuk <eugene.zhuk@gmail.com>
 Evgeny Safronov <division494@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
 Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
 JianXiong Zhou <zhoujianxiong2@gmail.com>
 Jussi Knuuttila <jussi.knuuttila@gmail.com>
 Kaito Udagawa <umireon@gmail.com>
 Kai Wolf <kai.wolf@gmail.com>
 Lei Xu <eddyxu@gmail.com>
 Matt Clarkson <mattyclarkson@gmail.com>
 Oleksandr Sochka <sasha.sochka@gmail.com>
 Pascal Leroy <phl@google.com>
 Paul Redmond <paul.redmond@gmail.com>
 Pierre Phaneuf <pphaneuf@google.com>
 Radoslav Yovchev <radoslav.tm@gmail.com>
 Shuo Chen <chenshuo@chenshuo.com>
 Yusuke Suzuki <utatane.tea@gmail.com>
 Tobias Ulvgård <tobias.ulvgard@dirac.se>
 Zbigniew Skowron <zbychs@gmail.com>
 Dominik Czarnota <dominik.b.czarnota@gmail.com>
--- a/utils/google-benchmark/LICENSE
+++ b/utils/google-benchmark/LICENSE
@@ -0,0 +1,202 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
   Copyright [yyyy] [name of copyright owner]
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/utils/google-benchmark/README.LLVM
+++ b/utils/google-benchmark/README.LLVM
@@ -0,0 +1,6 @@
 LLVM notes
 ----------
 This directory contains the Google Benchmark source code with some unnecessary
 files removed. Note that this directory is under a different license than
 libc++.
--- a/utils/google-benchmark/README.md
+++ b/utils/google-benchmark/README.md
@@ -0,0 +1,510 @@
 # benchmark
 [![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark)
 [![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master)
 [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark)
 A library to support the benchmarking of functions, similar to unit-tests.
 Discussion group: https://groups.google.com/d/forum/benchmark-discuss
 IRC channel: https://freenode.net #googlebenchmark
 ## Example usage
 ### Basic usage
 Define a function that executes the code to be measured.
 ```c++
 static void BM_StringCreation(benchmark::State& state) {
  while (state.KeepRunning())
    std::string empty_string;
 }
 // Register the function as a benchmark
 BENCHMARK(BM_StringCreation);
 // Define another benchmark
 static void BM_StringCopy(benchmark::State& state) {
  std::string x = "hello";
  while (state.KeepRunning())
    std::string copy(x);
 }
 BENCHMARK(BM_StringCopy);
 BENCHMARK_MAIN();
 ```
 ### Passing arguments
 Sometimes a family of benchmarks can be implemented with just one routine that
 takes an extra argument to specify which one of the family of benchmarks to
 run. For example, the following code defines a family of benchmarks for
 measuring the speed of `memcpy()` calls of different lengths:
 ```c++
 static void BM_memcpy(benchmark::State& state) {
  char* src = new char[state.range_x()];
  char* dst = new char[state.range_x()];
  memset(src, 'x', state.range_x());
  while (state.KeepRunning())
    memcpy(dst, src, state.range_x());
  state.SetBytesProcessed(int64_t(state.iterations()) *
                          int64_t(state.range_x()));
  delete[] src;
  delete[] dst;
 }
 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
 ```
 The preceding code is quite repetitive, and can be replaced with the following
 short-hand. The following invocation will pick a few appropriate arguments in
 the specified range and will generate a benchmark for each such argument.
 ```c++
 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
 ```
 By default the arguments in the range are generated in multiples of eight and
 the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the
 range multiplier is changed to multiples of two.
 ```c++
 BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10);
 ```
 Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ].
 You might have a benchmark that depends on two inputs. For example, the
 following code defines a family of benchmarks for measuring the speed of set
 insertion.
 ```c++
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
    std::set<int> data = ConstructRandomSet(state.range_x());
    state.ResumeTiming();
    for (int j = 0; j < state.range_y(); ++j)
      data.insert(RandomNumber());
  }
 }
 BENCHMARK(BM_SetInsert)
    ->ArgPair(1<<10, 1)
    ->ArgPair(1<<10, 8)
    ->ArgPair(1<<10, 64)
    ->ArgPair(1<<10, 512)
    ->ArgPair(8<<10, 1)
    ->ArgPair(8<<10, 8)
    ->ArgPair(8<<10, 64)
    ->ArgPair(8<<10, 512);
 ```
 The preceding code is quite repetitive, and can be replaced with the following
 short-hand. The following macro will pick a few appropriate arguments in the
 product of the two specified ranges and will generate a benchmark for each such
 pair.
 ```c++
 BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
 ```
 For more complex patterns of inputs, passing a custom function to `Apply` allows
 programmatic specification of an arbitrary set of arguments on which to run the
 benchmark. The following example enumerates a dense range on one parameter,
 and a sparse range on the second.
 ```c++
 static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int i = 0; i <= 10; ++i)
    for (int j = 32; j <= 1024*1024; j *= 8)
      b->ArgPair(i, j);
 }
 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
 ```
 ### Calculate asymptotic complexity (Big O)
 Asymptotic complexity might be calculated for a family of benchmarks. The
 following code will calculate the coefficient for the high-order term in the
 running time and the normalized root-mean square error of string comparison.
 ```c++
 static void BM_StringCompare(benchmark::State& state) {
  std::string s1(state.range_x(), '-');
  std::string s2(state.range_x(), '-');
  while (state.KeepRunning()) {
    benchmark::DoNotOptimize(s1.compare(s2));
  }
  state.SetComplexityN(state.range_x());
 }
 BENCHMARK(BM_StringCompare)
    ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN);
 ```
 As shown in the following invocation, asymptotic complexity might also be
 calculated automatically.
 ```c++
 BENCHMARK(BM_StringCompare)
    ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity();
 ```
 The following code will specify asymptotic complexity with a lambda function,
 that might be used to customize high-order term calculation.
 ```c++
 BENCHMARK(BM_StringCompare)->RangeMultiplier(2)
    ->Range(1<<10, 1<<18)->Complexity([](int n)->double{return n; });
 ```
 ### Templated benchmarks
 Templated benchmarks work the same way: This example produces and consumes
 messages of size `sizeof(v)` `range_x` times. It also outputs throughput in the
 absence of multiprogramming.
 ```c++
 template <class Q> int BM_Sequential(benchmark::State& state) {
  Q q;
  typename Q::value_type v;
  while (state.KeepRunning()) {
    for (int i = state.range_x(); i--; )
      q.push(v);
    for (int e = state.range_x(); e--; )
      q.Wait(&v);
  }
  // actually messages, not bytes:
  state.SetBytesProcessed(
      static_cast<int64_t>(state.iterations())*state.range_x());
 }
 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
 ```
 Three macros are provided for adding benchmark templates.
 ```c++
 #if __cplusplus >= 201103L // C++11 and greater.
 #define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters.
 #else // C++ < C++11
 #define BENCHMARK_TEMPLATE(func, arg1)
 #endif
 #define BENCHMARK_TEMPLATE1(func, arg1)
 #define BENCHMARK_TEMPLATE2(func, arg1, arg2)
 ```
 ## Passing arbitrary arguments to a benchmark
 In C++11 it is possible to define a benchmark that takes an arbitrary number
 of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)`
 macro creates a benchmark that invokes `func`  with the `benchmark::State` as
 the first argument followed by the specified `args...`.
 The `test_case_name` is appended to the name of the benchmark and
 should describe the values passed.
 ```c++
 template <class ...ExtraArgs>`
 void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
  [...]
 }
 // Registers a benchmark named "BM_takes_args/int_string_test` that passes
 // the specified values to `extra_args`.
 BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
 ```
 Note that elements of `...args` may refer to global variables. Users should
 avoid modifying global state inside of a benchmark.
 ### Multithreaded benchmarks
 In a multithreaded test (benchmark invoked by multiple threads simultaneously),
 it is guaranteed that none of the threads will start until all have called
 `KeepRunning`, and all will have finished before KeepRunning returns false. As
 such, any global setup or teardown can be wrapped in a check against the thread
 index:
 ```c++
 static void BM_MultiThreaded(benchmark::State& state) {
  if (state.thread_index == 0) {
    // Setup code here.
  }
  while (state.KeepRunning()) {
    // Run the test as normal.
  }
  if (state.thread_index == 0) {
    // Teardown code here.
  }
 }
 BENCHMARK(BM_MultiThreaded)->Threads(2);
 ```
 If the benchmarked code itself uses threads and you want to compare it to
 single-threaded code, you may want to use real-time ("wallclock") measurements
 for latency comparisons:
 ```c++
 BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime();
 ```
 Without `UseRealTime`, CPU time is used by default.
 ## Manual timing
 For benchmarking something for which neither CPU time nor real-time are
 correct or accurate enough, completely manual timing is supported using
 the `UseManualTime` function. 
 When `UseManualTime` is used, the benchmarked code must call
 `SetIterationTime` once per iteration of the `KeepRunning` loop to
 report the manually measured time.
 An example use case for this is benchmarking GPU execution (e.g. OpenCL
 or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot
 be accurately measured using CPU time or real-time. Instead, they can be
 measured accurately using a dedicated API, and these measurement results
 can be reported back with `SetIterationTime`.
 ```c++
 static void BM_ManualTiming(benchmark::State& state) {
  int microseconds = state.range_x();
  std::chrono::duration<double, std::micro> sleep_duration {
    static_cast<double>(microseconds)
  };
  while (state.KeepRunning()) {
    auto start = std::chrono::high_resolution_clock::now();
    // Simulate some useful workload with a sleep
    std::this_thread::sleep_for(sleep_duration);
    auto end   = std::chrono::high_resolution_clock::now();
    auto elapsed_seconds =
      std::chrono::duration_cast<std::chrono::duration<double>>(
        end - start);
    state.SetIterationTime(elapsed_seconds.count());
  }
 }
 BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime();
 ```
 ### Preventing optimisation
 To prevent a value or expression from being optimized away by the compiler
 the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()`
 functions can be used.
 ```c++
 static void BM_test(benchmark::State& state) {
  while (state.KeepRunning()) {
      int x = 0;
      for (int i=0; i < 64; ++i) {
        benchmark::DoNotOptimize(x += i);
      }
  }
 }
 ```
 `DoNotOptimize(<expr>)` forces the  *result* of `<expr>` to be stored in either
 memory or a register. For GNU based compilers it acts as read/write barrier
 for global memory. More specifically it forces the compiler to flush pending
 writes to memory and reload any other values as necessary.
 Note that `DoNotOptimize(<expr>)` does not prevent optimizations on `<expr>`
 in any way. `<expr>` may even be removed entirely when the result is already
 known. For example:
 ```c++
  /* Example 1: `<expr>` is removed entirely. */
  int foo(int x) { return x + 42; }
  while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42);
  /*  Example 2: Result of '<expr>' is only reused */
  int bar(int) __attribute__((const));
  while (...) DoNotOptimize(bar(0)); // Optimized to:
  // int __result__ = bar(0);
  // while (...) DoNotOptimize(__result__);
 ```
 The second tool for preventing optimizations is `ClobberMemory()`. In essence
 `ClobberMemory()` forces the compiler to perform all pending writes to global
 memory. Memory managed by block scope objects must be "escaped" using
 `DoNotOptimize(...)` before it can be clobbered. In the below example
 `ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized
 away.
 ```c++
 static void BM_vector_push_back(benchmark::State& state) {
  while (state.KeepRunning()) {
    std::vector<int> v;
    v.reserve(1);
    benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered.
    v.push_back(42);
    benchmark::ClobberMemory(); // Force 42 to be written to memory.
  }
 }
 ```
 Note that `ClobberMemory()` is only available for GNU based compilers.
 ### Set time unit manually
 If a benchmark runs a few milliseconds it may be hard to visually compare the
 measured times, since the output data is given in nanoseconds per default. In
 order to manually set the time unit, you can specify it manually:
 ```c++
 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 ```
 ## Controlling number of iterations
 In all cases, the number of iterations for which the benchmark is run is
 governed by the amount of time the benchmark takes. Concretely, the number of
 iterations is at least one, not more than 1e9, until CPU time is greater than
 the minimum time, or the wallclock time is 5x minimum time. The minimum time is
 set as a flag `--benchmark_min_time` or per benchmark by calling `MinTime` on
 the registered benchmark object.
 ## Reporting the mean and standard devation by repeated benchmarks
 By default each benchmark is run once and that single result is reported.
 However benchmarks are often noisy and a single result may not be representative
 of the overall behavior. For this reason it's possible to repeatedly rerun the
 benchmark.
 The number of runs of each benchmark is specified globally by the
 `--benchmark_repetitions` flag or on a per benchmark basis by calling
 `Repetitions` on the registered benchmark object. When a benchmark is run
 more than once the mean and standard deviation of the runs will be reported.
 ## Fixtures
 Fixture tests are created by
 first defining a type that derives from ::benchmark::Fixture and then
 creating/registering the tests using the following macros:
 * `BENCHMARK_F(ClassName, Method)`
 * `BENCHMARK_DEFINE_F(ClassName, Method)`
 * `BENCHMARK_REGISTER_F(ClassName, Method)`
 For Example:
 ```c++
 class MyFixture : public benchmark::Fixture {};
 BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) {
   while (st.KeepRunning()) {
     ...
  }
 }
 BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) {
   while (st.KeepRunning()) {
     ...
  }
 }
 /* BarTest is NOT registered */
 BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2);
 /* BarTest is now registered */
 ```
 ## Exiting Benchmarks in Error
 When errors caused by external influences, such as file I/O and network
 communication, occur within a benchmark the
 `State::SkipWithError(const char* msg)` function can be used to skip that run
 of benchmark and report the error. Note that only future iterations of the
 `KeepRunning()` are skipped. Users may explicitly return to exit the
 benchmark immediately.
 The `SkipWithError(...)` function may be used at any point within the benchmark,
 including before and after the `KeepRunning()` loop.
 For example:
 ```c++
 static void BM_test(benchmark::State& state) {
  auto resource = GetResource();
  if (!resource.good()) {
      state.SkipWithError("Resource is not good!");
      // KeepRunning() loop will not be entered.
  }
  while (state.KeepRunning()) {
      auto data = resource.read_data();
      if (!resource.good()) {
        state.SkipWithError("Failed to read data!");
        break; // Needed to skip the rest of the iteration.
     }
     do_stuff(data);
  }
 }
 ```
 ## Output Formats
 The library supports multiple output formats. Use the
 `--benchmark_format=<tabular|json|csv>` flag to set the format type. `tabular` is
 the default format.
 The Tabular format is intended to be a human readable format. By default
 the format generates color output. Context is output on stderr and the 
 tabular data on stdout. Example tabular output looks like:
 ```
 Benchmark                               Time(ns)    CPU(ns) Iterations
 ----------------------------------------------------------------------
 BM_SetInsert/1024/1                        28928      29349      23853  133.097kB/s   33.2742k items/s
 BM_SetInsert/1024/8                        32065      32913      21375  949.487kB/s   237.372k items/s
 BM_SetInsert/1024/10                       33157      33648      21431  1.13369MB/s   290.225k items/s
 ```
 The JSON format outputs human readable json split into two top level attributes.
 The `context` attribute contains information about the run in general, including
 information about the CPU and the date.
 The `benchmarks` attribute contains a list of ever benchmark run. Example json
 output looks like:
 ``` json
 {
  "context": {
    "date": "2015/03/17-18:40:25",
    "num_cpus": 40,
    "mhz_per_cpu": 2801,
    "cpu_scaling_enabled": false,
    "build_type": "debug"
  },
  "benchmarks": [
    {
      "name": "BM_SetInsert/1024/1",
      "iterations": 94877,
      "real_time": 29275,
      "cpu_time": 29836,
      "bytes_per_second": 134066,
      "items_per_second": 33516
    },
    {
      "name": "BM_SetInsert/1024/8",
      "iterations": 21609,
      "real_time": 32317,
      "cpu_time": 32429,
      "bytes_per_second": 986770,
      "items_per_second": 246693
    },
    {
      "name": "BM_SetInsert/1024/10",
      "iterations": 21393,
      "real_time": 32724,
      "cpu_time": 33355,
      "bytes_per_second": 1199226,
      "items_per_second": 299807
    }
  ]
 }
 ```
 The CSV format outputs comma-separated values. The `context` is output on stderr
 and the CSV itself on stdout. Example CSV output looks like:
 ```
 name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label
 "BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942,
 "BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115,
 "BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06,
 ```
 ## Debug vs Release
 By default, benchmark builds as a debug library. You will see a warning in the output when this is the case. To build it as a release library instead, use:
 ```
 cmake -DCMAKE_BUILD_TYPE=Release
 ```
 To enable link-time optimisation, use
 ```
 cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true
 ```
 ## Linking against the library
 When using gcc, it is necessary to link against pthread to avoid runtime exceptions. This is due to how gcc implements std::thread. See [issue #67](https://github.com/google/benchmark/issues/67) for more details.
--- a/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake
+++ b/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake
@@ -0,0 +1,37 @@
 # - Adds a compiler flag if it is supported by the compiler
 #
 # This function checks that the supplied compiler flag is supported and then
 # adds it to the corresponding compiler flags
 #
 #  add_cxx_compiler_flag(<FLAG> [<VARIANT>])
 #
 # - Example
 #
 # include(AddCXXCompilerFlag)
 # add_cxx_compiler_flag(-Wall)
 # add_cxx_compiler_flag(-no-strict-aliasing RELEASE)
 # Requires CMake 2.6+
 if(__add_cxx_compiler_flag)
  return()
 endif()
 set(__add_cxx_compiler_flag INCLUDED)
 include(CheckCXXCompilerFlag)
 function(add_cxx_compiler_flag FLAG)
  string(TOUPPER "HAVE_CXX_FLAG_${FLAG}" SANITIZED_FLAG)
  string(REPLACE "+" "X" SANITIZED_FLAG ${SANITIZED_FLAG})
  string(REGEX REPLACE "[^A-Za-z_0-9]" "_" SANITIZED_FLAG ${SANITIZED_FLAG})
  string(REGEX REPLACE "_+" "_" SANITIZED_FLAG ${SANITIZED_FLAG})
  set(CMAKE_REQUIRED_FLAGS "${FLAG}")
  check_cxx_compiler_flag("" ${SANITIZED_FLAG})
  if(${SANITIZED_FLAG})
    set(VARIANT ${ARGV1})
    if(ARGV1)
      string(TOUPPER "_${VARIANT}" VARIANT)
    endif()
    set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
  endif()
 endfunction()
--- a/utils/google-benchmark/cmake/CXXFeatureCheck.cmake
+++ b/utils/google-benchmark/cmake/CXXFeatureCheck.cmake
@@ -0,0 +1,42 @@
 # - Compile and run code to check for C++ features
 #
 # This functions compiles a source file under the `cmake` folder
 # and adds the corresponding `HAVE_[FILENAME]` flag to the CMake
 # environment
 #
 #  cxx_feature_check(<FLAG> [<VARIANT>])
 #
 # - Example
 #
 # include(CXXFeatureCheck)
 # cxx_feature_check(STD_REGEX)
 # Requires CMake 2.6+
 if(__cxx_feature_check)
  return()
 endif()
 set(__cxx_feature_check INCLUDED)
 function(cxx_feature_check FILE)
  string(TOLOWER ${FILE} FILE)
  string(TOUPPER ${FILE} VAR)
  string(TOUPPER "HAVE_${VAR}" FEATURE)
  if (DEFINED HAVE_${VAR})
    return()
  endif()
  message("-- Performing Test ${FEATURE}")
  try_run(RUN_${FEATURE} COMPILE_${FEATURE}
          ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp)
  if(RUN_${FEATURE} EQUAL 0)
    message("-- Performing Test ${FEATURE} -- success")
    set(HAVE_${VAR} 1 CACHE INTERNAL "Feature test for ${FILE}" PARENT_SCOPE)
    add_definitions(-DHAVE_${VAR})
  else()
    if(NOT COMPILE_${FEATURE})
      message("-- Performing Test ${FEATURE} -- failed to compile")
    else()
      message("-- Performing Test ${FEATURE} -- compiled but failed to run")
    endif()
  endif()
 endfunction()
--- a/utils/google-benchmark/cmake/GetGitVersion.cmake
+++ b/utils/google-benchmark/cmake/GetGitVersion.cmake
@@ -0,0 +1,51 @@
 # - Returns a version string from Git tags
 #
 # This function inspects the annotated git tags for the project and returns a string
 # into a CMake variable
 #
 #  get_git_version(<var>)
 #
 # - Example
 #
 # include(GetGitVersion)
 # get_git_version(GIT_VERSION)
 #
 # Requires CMake 2.8.11+
 find_package(Git)
 if(__get_git_version)
  return()
 endif()
 set(__get_git_version INCLUDED)
 function(get_git_version var)
  if(GIT_EXECUTABLE)
      execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
          RESULT_VARIABLE status
          OUTPUT_VARIABLE GIT_VERSION
          ERROR_QUIET)
      if(${status})
          set(GIT_VERSION "v0.0.0")
      else()
          string(STRIP ${GIT_VERSION} GIT_VERSION)
          string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION})
      endif()
      # Work out if the repository is dirty
      execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh
          OUTPUT_QUIET
          ERROR_QUIET)
      execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD --
          OUTPUT_VARIABLE GIT_DIFF_INDEX
          ERROR_QUIET)
      string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
      if (${GIT_DIRTY})
          set(GIT_VERSION "${GIT_VERSION}-dirty")
      endif()
  else()
      set(GIT_VERSION "v0.0.0")
  endif()
  message("-- git Version: ${GIT_VERSION}")
  set(${var} ${GIT_VERSION} PARENT_SCOPE)
 endfunction()
--- a/utils/google-benchmark/cmake/gnu_posix_regex.cpp
+++ b/utils/google-benchmark/cmake/gnu_posix_regex.cpp
@@ -0,0 +1,12 @@
 #include <gnuregex.h>
 #include <string>
 int main() {
  std::string str = "test0159";
  regex_t re;
  int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB);
  if (ec != 0) {
    return ec;
  }
  return regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0;
 }
--- a/utils/google-benchmark/cmake/posix_regex.cpp
+++ b/utils/google-benchmark/cmake/posix_regex.cpp
@@ -0,0 +1,14 @@
 #include <regex.h>
 #include <string>
 int main() {
  std::string str = "test0159";
  regex_t re;
  int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB);
  if (ec != 0) {
    return ec;
  }
  int ret = regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0;
  regfree(&re);
  return ret;
 }
--- a/utils/google-benchmark/cmake/std_regex.cpp
+++ b/utils/google-benchmark/cmake/std_regex.cpp
@@ -0,0 +1,10 @@
 #include <regex>
 #include <string>
 int main() {
  const std::string str = "test0159";
  std::regex re;
  re = std::regex("^[a-z]+[0-9]+$",
       std::regex_constants::extended | std::regex_constants::nosubs);
  return std::regex_search(str, re) ? 0 : -1;
 }
--- a/utils/google-benchmark/cmake/steady_clock.cpp
+++ b/utils/google-benchmark/cmake/steady_clock.cpp
@@ -0,0 +1,7 @@
 #include <chrono>
 int main() {
    typedef std::chrono::steady_clock Clock;
    Clock::time_point tp = Clock::now();
    ((void)tp);
 }
--- a/utils/google-benchmark/cmake/thread_safety_attributes.cpp
+++ b/utils/google-benchmark/cmake/thread_safety_attributes.cpp
@@ -0,0 +1,4 @@
 #define HAVE_THREAD_SAFETY_ATTRIBUTES
 #include "../src/mutex.h"
 int main() {}
--- a/utils/google-benchmark/include/benchmark/benchmark.h
+++ b/utils/google-benchmark/include/benchmark/benchmark.h
@@ -0,0 +1,21 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef BENCHMARK_BENCHMARK_H_
 #define BENCHMARK_BENCHMARK_H_
 #include "macros.h"
 #include "benchmark_api.h"
 #include "reporter.h"
 #endif // BENCHMARK_BENCHMARK_H_
--- a/utils/google-benchmark/include/benchmark/benchmark_api.h
+++ b/utils/google-benchmark/include/benchmark/benchmark_api.h
@@ -0,0 +1,747 @@
 // Support for registering benchmarks for functions.
 /* Example usage:
 // Define a function that executes the code to be measured a
 // specified number of times:
 static void BM_StringCreation(benchmark::State& state) {
  while (state.KeepRunning())
    std::string empty_string;
 }
 // Register the function as a benchmark
 BENCHMARK(BM_StringCreation);
 // Define another benchmark
 static void BM_StringCopy(benchmark::State& state) {
  std::string x = "hello";
  while (state.KeepRunning())
    std::string copy(x);
 }
 BENCHMARK(BM_StringCopy);
 // Augment the main() program to invoke benchmarks if specified
 // via the --benchmarks command line flag.  E.g.,
 //       my_unittest --benchmark_filter=all
 //       my_unittest --benchmark_filter=BM_StringCreation
 //       my_unittest --benchmark_filter=String
 //       my_unittest --benchmark_filter='Copy|Creation'
 int main(int argc, char** argv) {
  benchmark::Initialize(&argc, argv);
  benchmark::RunSpecifiedBenchmarks();
  return 0;
 }
 // Sometimes a family of microbenchmarks can be implemented with
 // just one routine that takes an extra argument to specify which
 // one of the family of benchmarks to run.  For example, the following
 // code defines a family of microbenchmarks for measuring the speed
 // of memcpy() calls of different lengths:
 static void BM_memcpy(benchmark::State& state) {
  char* src = new char[state.range_x()]; char* dst = new char[state.range_x()];
  memset(src, 'x', state.range_x());
  while (state.KeepRunning())
    memcpy(dst, src, state.range_x());
  state.SetBytesProcessed(int64_t(state.iterations()) *
                          int64_t(state.range_x()));
  delete[] src; delete[] dst;
 }
 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
 // The preceding code is quite repetitive, and can be replaced with the
 // following short-hand.  The following invocation will pick a few
 // appropriate arguments in the specified range and will generate a
 // microbenchmark for each such argument.
 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
 // You might have a microbenchmark that depends on two inputs.  For
 // example, the following code defines a family of microbenchmarks for
 // measuring the speed of set insertion.
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
    set<int> data = ConstructRandomSet(state.range_x());
    state.ResumeTiming();
    for (int j = 0; j < state.range_y(); ++j)
      data.insert(RandomNumber());
  }
 }
 BENCHMARK(BM_SetInsert)
   ->ArgPair(1<<10, 1)
   ->ArgPair(1<<10, 8)
   ->ArgPair(1<<10, 64)
   ->ArgPair(1<<10, 512)
   ->ArgPair(8<<10, 1)
   ->ArgPair(8<<10, 8)
   ->ArgPair(8<<10, 64)
   ->ArgPair(8<<10, 512);
 // The preceding code is quite repetitive, and can be replaced with
 // the following short-hand.  The following macro will pick a few
 // appropriate arguments in the product of the two specified ranges
 // and will generate a microbenchmark for each such pair.
 BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
 // For more complex patterns of inputs, passing a custom function
 // to Apply allows programmatic specification of an
 // arbitrary set of arguments to run the microbenchmark on.
 // The following example enumerates a dense range on
 // one parameter, and a sparse range on the second.
 static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int i = 0; i <= 10; ++i)
    for (int j = 32; j <= 1024*1024; j *= 8)
      b->ArgPair(i, j);
 }
 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
 // Templated microbenchmarks work the same way:
 // Produce then consume 'size' messages 'iters' times
 // Measures throughput in the absence of multiprogramming.
 template <class Q> int BM_Sequential(benchmark::State& state) {
  Q q;
  typename Q::value_type v;
  while (state.KeepRunning()) {
    for (int i = state.range_x(); i--; )
      q.push(v);
    for (int e = state.range_x(); e--; )
      q.Wait(&v);
  }
  // actually messages, not bytes:
  state.SetBytesProcessed(
      static_cast<int64_t>(state.iterations())*state.range_x());
 }
 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
 benchmark. This option overrides the `benchmark_min_time` flag.
 void BM_test(benchmark::State& state) {
 ... body ...
 }
 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
 In a multithreaded test, it is guaranteed that none of the threads will start
 until all have called KeepRunning, and all will have finished before KeepRunning
 returns false. As such, any global setup or teardown you want to do can be
 wrapped in a check against the thread index:
 static void BM_MultiThreaded(benchmark::State& state) {
  if (state.thread_index == 0) {
    // Setup code here.
  }
  while (state.KeepRunning()) {
    // Run the test as normal.
  }
  if (state.thread_index == 0) {
    // Teardown code here.
  }
 }
 BENCHMARK(BM_MultiThreaded)->Threads(4);
 If a benchmark runs a few milliseconds it may be hard to visually compare the
 measured times, since the output data is given in nanoseconds per default. In
 order to manually set the time unit, you can specify it manually:
 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 */
 #ifndef BENCHMARK_BENCHMARK_API_H_
 #define BENCHMARK_BENCHMARK_API_H_
 #include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
 #include "macros.h"
 namespace benchmark {
 class BenchmarkReporter;
 void Initialize(int* argc, char** argv);
 // Generate a list of benchmarks matching the specified --benchmark_filter flag
 // and if --benchmark_list_tests is specified return after printing the name
 // of each matching benchmark. Otherwise run each matching benchmark and
 // report the results.
 //
 // The second overload reports the results using the specified 'reporter'.
 //
 // RETURNS: The number of matching benchmarks.
 size_t RunSpecifiedBenchmarks();
 size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter);
 // If this routine is called, peak memory allocation past this point in the
 // benchmark is reported at the end of the benchmark report line. (It is
 // computed by running the benchmark once with a single iteration and a memory
 // tracer.)
 // TODO(dominic)
 // void MemoryUsage();
 namespace internal {
 class Benchmark;
 class BenchmarkImp;
 class BenchmarkFamilies;
 template <class T> struct Voider {
    typedef void type;
 };
 template <class T, class = void>
 struct EnableIfString {};
 template <class T>
 struct EnableIfString<T, typename Voider<typename T::basic_string>::type> {
    typedef int type;
 };
 void UseCharPointer(char const volatile*);
 // Take ownership of the pointer and register the benchmark. Return the
 // registered benchmark.
 Benchmark* RegisterBenchmarkInternal(Benchmark*);
 } // end namespace internal
 // The DoNotOptimize(...) function can be used to prevent a value or
 // expression from being optimized away by the compiler. This function is
 // intended to add little to no overhead.
 // See: https://youtu.be/nXaxk27zwlk?t=2441
 #if defined(__GNUC__)
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
    asm volatile("" : : "g"(value) : "memory");
 }
 // Force the compiler to flush pending writes to global memory. Acts as an
 // effective read/write barrier
 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
    asm volatile("" : : : "memory");
 }
 #else
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
    internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
 }
 // FIXME Add ClobberMemory() for non-gnu compilers
 #endif
 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
 // for the measured time.
 enum TimeUnit {
  kNanosecond,
  kMicrosecond,
  kMillisecond
 };
 // BigO is passed to a benchmark in order to specify the asymptotic computational 
 // complexity for the benchmark. In case oAuto is selected, complexity will be 
 // calculated automatically to the best fit.
 enum BigO {
  oNone,
  o1,
  oN,
  oNSquared,
  oNCubed,
  oLogN,
  oNLogN,
  oAuto,
  oLambda
 };
 // BigOFunc is passed to a benchmark in order to specify the asymptotic 
 // computational complexity for the benchmark.
 typedef double(BigOFunc)(int);
 // State is passed to a running Benchmark and contains state for the
 // benchmark to use.
 class State {
 public:
  State(size_t max_iters, bool has_x, int x, bool has_y, int y,
        int thread_i, int n_threads);
  // Returns true if the benchmark should continue through another iteration.
  // NOTE: A benchmark may not return from the test until KeepRunning() has
  // returned false.
  bool KeepRunning() {
    if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
      assert(!finished_);
      started_ = true;
      ResumeTiming();
    }
    bool const res = total_iterations_++ < max_iterations;
    if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
      assert(started_ && (!finished_ || error_occurred_));
      if (!error_occurred_) {
        PauseTiming();
      }
      // Total iterations now is one greater than max iterations. Fix this.
      total_iterations_ = max_iterations;
      finished_ = true;
    }
    return res;
  }
  // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
  //           by the current thread.
  // Stop the benchmark timer.  If not called, the timer will be
  // automatically stopped after KeepRunning() returns false for the first time.
  //
  // For threaded benchmarks the PauseTiming() function acts
  // like a barrier.  I.e., the ith call by a particular thread to this
  // function will block until all active threads have made their ith call.
  // The timer will stop when the last thread has called this function.
  //
  // NOTE: PauseTiming()/ResumeTiming() are relatively
  // heavyweight, and so their use should generally be avoided
  // within each benchmark iteration, if possible.
  void PauseTiming();
  // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
  //           by the current thread.
  // Start the benchmark timer.  The timer is NOT running on entrance to the
  // benchmark function. It begins running after the first call to KeepRunning()
  //
  // For threaded benchmarks the ResumeTiming() function acts
  // like a barrier.  I.e., the ith call by a particular thread to this
  // function will block until all active threads have made their ith call.
  // The timer will start when the last thread has called this function.
  //
  // NOTE: PauseTiming()/ResumeTiming() are relatively
  // heavyweight, and so their use should generally be avoided
  // within each benchmark iteration, if possible.
  void ResumeTiming();
  // REQUIRES: 'SkipWithError(...)' has not been called previously by the
  //            current thread.
  // Skip any future iterations of the 'KeepRunning()' loop in the current
  // thread and report an error with the specified 'msg'. After this call
  // the user may explicitly 'return' from the benchmark.
  //
  // For threaded benchmarks only the current thread stops executing. If
  // multiple threads report an error only the first error message is used.
  // The current thread is no longer considered 'active' by
  // 'PauseTiming()' and 'ResumingTiming()'.
  //
  // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
  // the current scope immediately. If the function is called from within
  // the 'KeepRunning()' loop the current iteration will finish. It is the users
  // responsibility to exit the scope as needed.
  void SkipWithError(const char* msg);
  // REQUIRES: called exactly once per iteration of the KeepRunning loop.
  // Set the manually measured time for this benchmark iteration, which
  // is used instead of automatically measured time if UseManualTime() was
  // specified.
  //
  // For threaded benchmarks the SetIterationTime() function acts
  // like a barrier.  I.e., the ith call by a particular thread to this
  // function will block until all threads have made their ith call.
  // The time will be set by the last thread to call this function.
  void SetIterationTime(double seconds);
  // Set the number of bytes processed by the current benchmark
  // execution.  This routine is typically called once at the end of a
  // throughput oriented benchmark.  If this routine is called with a
  // value > 0, the report is printed in MB/sec instead of nanoseconds
  // per iteration.
  //
  // REQUIRES: a benchmark has exited its KeepRunning loop.
  BENCHMARK_ALWAYS_INLINE
  void SetBytesProcessed(size_t bytes) {
    bytes_processed_ = bytes;
  }
  BENCHMARK_ALWAYS_INLINE
  size_t bytes_processed() const {
    return bytes_processed_;
  }
  // If this routine is called with complexity_n > 0 and complexity report is requested for the 
  // family benchmark, then current benchmark will be part of the computation and complexity_n will
  // represent the length of N.
  BENCHMARK_ALWAYS_INLINE
  void SetComplexityN(int complexity_n) {
    complexity_n_ = complexity_n;
  }
  BENCHMARK_ALWAYS_INLINE
  size_t complexity_length_n() {
    return complexity_n_;
  }
  // If this routine is called with items > 0, then an items/s
  // label is printed on the benchmark report line for the currently
  // executing benchmark. It is typically called at the end of a processing
  // benchmark where a processing items/second output is desired.
  //
  // REQUIRES: a benchmark has exited its KeepRunning loop.
  BENCHMARK_ALWAYS_INLINE
  void SetItemsProcessed(size_t items) {
    items_processed_ = items;
  }
  BENCHMARK_ALWAYS_INLINE
  size_t items_processed() const {
    return items_processed_;
  }
  // If this routine is called, the specified label is printed at the
  // end of the benchmark report line for the currently executing
  // benchmark.  Example:
  //  static void BM_Compress(benchmark::State& state) {
  //    ...
  //    double compress = input_size / output_size;
  //    state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
  //  }
  // Produces output that looks like:
  //  BM_Compress   50         50   14115038  compress:27.3%
  //
  // REQUIRES: a benchmark has exited its KeepRunning loop.
  void SetLabel(const char* label);
  // Allow the use of std::string without actually including <string>.
  // This function does not participate in overload resolution unless StringType
  // has the nested typename `basic_string`. This typename should be provided
  // as an injected class name in the case of std::string.
  template <class StringType>
  void SetLabel(StringType const & str,
                typename internal::EnableIfString<StringType>::type = 1) {
    this->SetLabel(str.c_str());
  }
  // Range arguments for this run. CHECKs if the argument has been set.
  BENCHMARK_ALWAYS_INLINE
  int range_x() const {
    assert(has_range_x_);
    ((void)has_range_x_); // Prevent unused warning.
    return range_x_;
  }
  BENCHMARK_ALWAYS_INLINE
  int range_y() const {
    assert(has_range_y_);
    ((void)has_range_y_); // Prevent unused warning.
    return range_y_;
  }
  BENCHMARK_ALWAYS_INLINE
  size_t iterations() const { return total_iterations_; }
 private:
  bool started_;
  bool finished_;
  size_t total_iterations_;
  bool has_range_x_;
  int range_x_;
  bool has_range_y_;
  int range_y_;
  size_t bytes_processed_;
  size_t items_processed_;
  int complexity_n_;
 public:
  // FIXME: Make this private somehow.
  bool error_occurred_;
 public:
  // Index of the executing thread. Values from [0, threads).
  const int thread_index;
  // Number of threads concurrently executing the benchmark.
  const int threads;
  const size_t max_iterations;
 private:
  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
 };
 namespace internal {
 typedef void(Function)(State&);
 // ------------------------------------------------------
 // Benchmark registration object.  The BENCHMARK() macro expands
 // into an internal::Benchmark* object.  Various methods can
 // be called on this object to change the properties of the benchmark.
 // Each method returns "this" so that multiple method calls can
 // chained into one expression.
 class Benchmark {
 public:
  virtual ~Benchmark();
  // Note: the following methods all return "this" so that multiple
  // method calls can be chained together in one expression.
  // Run this benchmark once with "x" as the extra argument passed
  // to the function.
  // REQUIRES: The function passed to the constructor must accept an arg1.
  Benchmark* Arg(int x);
  // Run this benchmark with the given time unit for the generated output report
  Benchmark* Unit(TimeUnit unit);
  // Run this benchmark once for a number of values picked from the
  // range [start..limit].  (start and limit are always picked.)
  // REQUIRES: The function passed to the constructor must accept an arg1.
  Benchmark* Range(int start, int limit);
  // Run this benchmark once for every value in the range [start..limit]
  // REQUIRES: The function passed to the constructor must accept an arg1.
  Benchmark* DenseRange(int start, int limit);
  // Run this benchmark once with "x,y" as the extra arguments passed
  // to the function.
  // REQUIRES: The function passed to the constructor must accept arg1,arg2.
  Benchmark* ArgPair(int x, int y);
  // Pick a set of values A from the range [lo1..hi1] and a set
  // of values B from the range [lo2..hi2].  Run the benchmark for
  // every pair of values in the cartesian product of A and B
  // (i.e., for all combinations of the values in A and B).
  // REQUIRES: The function passed to the constructor must accept arg1,arg2.
  Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2);
  // Pass this benchmark object to *func, which can customize
  // the benchmark by calling various methods like Arg, ArgPair,
  // Threads, etc.
  Benchmark* Apply(void (*func)(Benchmark* benchmark));
  // Set the range multiplier for non-dense range. If not called, the range multiplier 
  // kRangeMultiplier will be used.
  Benchmark* RangeMultiplier(int multiplier);
  // Set the minimum amount of time to use when running this benchmark. This
  // option overrides the `benchmark_min_time` flag.
  // REQUIRES: `t > 0`
  Benchmark* MinTime(double t);
  // Specify the amount of times to repeat this benchmark. This option overrides
  // the `benchmark_repetitions` flag.
  // REQUIRES: `n > 0`
  Benchmark* Repetitions(int n);
  // If a particular benchmark is I/O bound, runs multiple threads internally or
  // if for some reason CPU timings are not representative, call this method. If
  // called, the elapsed time will be used to control how many iterations are
  // run, and in the printing of items/second or MB/seconds values.  If not
  // called, the cpu time used by the benchmark will be used.
  Benchmark* UseRealTime();
  // If a benchmark must measure time manually (e.g. if GPU execution time is being
  // measured), call this method. If called, each benchmark iteration should call
  // SetIterationTime(seconds) to report the measured time, which will be used
  // to control how many iterations are run, and in the printing of items/second
  // or MB/second values.
  Benchmark* UseManualTime();
  // Set the asymptotic computational complexity for the benchmark. If called
  // the asymptotic computational complexity will be shown on the output. 
  Benchmark* Complexity(BigO complexity = benchmark::oAuto);
  // Set the asymptotic computational complexity for the benchmark. If called
  // the asymptotic computational complexity will be shown on the output.
  Benchmark* Complexity(BigOFunc* complexity);
  // Support for running multiple copies of the same benchmark concurrently
  // in multiple threads.  This may be useful when measuring the scaling
  // of some piece of code.
  // Run one instance of this benchmark concurrently in t threads.
  Benchmark* Threads(int t);
  // Pick a set of values T from [min_threads,max_threads].
  // min_threads and max_threads are always included in T.  Run this
  // benchmark once for each value in T.  The benchmark run for a
  // particular value t consists of t threads running the benchmark
  // function concurrently.  For example, consider:
  //    BENCHMARK(Foo)->ThreadRange(1,16);
  // This will run the following benchmarks:
  //    Foo in 1 thread
  //    Foo in 2 threads
  //    Foo in 4 threads
  //    Foo in 8 threads
  //    Foo in 16 threads
  Benchmark* ThreadRange(int min_threads, int max_threads);
  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
  Benchmark* ThreadPerCpu();
  virtual void Run(State& state) = 0;
  // Used inside the benchmark implementation
  struct Instance;
 protected:
  explicit Benchmark(const char* name);
  Benchmark(Benchmark const&);
  void SetName(const char* name);
 private:
  friend class BenchmarkFamilies;
  BenchmarkImp* imp_;
  Benchmark& operator=(Benchmark const&);
 };
 // The class used to hold all Benchmarks created from static function.
 // (ie those created using the BENCHMARK(...) macros.
 class FunctionBenchmark : public Benchmark {
 public:
    FunctionBenchmark(const char* name, Function* func)
        : Benchmark(name), func_(func)
    {}
    virtual void Run(State& st);
 private:
    Function* func_;
 };
 }  // end namespace internal
 // The base class for all fixture tests.
 class Fixture: public internal::Benchmark {
 public:
    Fixture() : internal::Benchmark("") {}
    virtual void Run(State& st) {
      this->SetUp(st);
      this->BenchmarkCase(st);
      this->TearDown(st);
    }
    virtual void SetUp(const State&) {}
    virtual void TearDown(const State&) {}
 protected:
    virtual void BenchmarkCase(State&) = 0;
 };
 }  // end namespace benchmark
 // ------------------------------------------------------
 // Macro to register benchmarks
 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
 // empty. If X is empty the expression becomes (+1 == +0).
 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
 #else
 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
 #endif
 // Helpers for generating unique variable names
 #define BENCHMARK_PRIVATE_NAME(n) \
    BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
 #define BENCHMARK_PRIVATE_DECLARE(n)       \
  static ::benchmark::internal::Benchmark* \
  BENCHMARK_PRIVATE_NAME(n) BENCHMARK_UNUSED
 #define BENCHMARK(n) \
    BENCHMARK_PRIVATE_DECLARE(n) =                               \
        (::benchmark::internal::RegisterBenchmarkInternal(       \
            new ::benchmark::internal::FunctionBenchmark(#n, n)))
 // Old-style macros
 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
  BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
 #if __cplusplus >= 201103L
 // Register a benchmark which invokes the function specified by `func`
 // with the additional arguments specified by `...`.
 //
 // For example:
 //
 // template <class ...ExtraArgs>`
 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
 //  [...]
 //}
 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
 #define BENCHMARK_CAPTURE(func, test_case_name, ...)                       \
    BENCHMARK_PRIVATE_DECLARE(func) =                                      \
        (::benchmark::internal::RegisterBenchmarkInternal(                 \
            new ::benchmark::internal::FunctionBenchmark(                  \
                    #func "/" #test_case_name,                             \
                    [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
 #endif // __cplusplus >= 11
 // This will register a benchmark for a templatized function.  For example:
 //
 // template<int arg>
 // void BM_Foo(int iters);
 //
 // BENCHMARK_TEMPLATE(BM_Foo, 1);
 //
 // will register BM_Foo<1> as a benchmark.
 #define BENCHMARK_TEMPLATE1(n, a) \
  BENCHMARK_PRIVATE_DECLARE(n) =  \
      (::benchmark::internal::RegisterBenchmarkInternal( \
        new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
 #define BENCHMARK_TEMPLATE2(n, a, b)                     \
  BENCHMARK_PRIVATE_DECLARE(n) =                         \
      (::benchmark::internal::RegisterBenchmarkInternal( \
        new ::benchmark::internal::FunctionBenchmark(    \
            #n "<" #a "," #b ">", n<a, b>)))
 #if __cplusplus >= 201103L
 #define BENCHMARK_TEMPLATE(n, ...)           \
  BENCHMARK_PRIVATE_DECLARE(n) =             \
      (::benchmark::internal::RegisterBenchmarkInternal( \
        new ::benchmark::internal::FunctionBenchmark( \
        #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
 #else
 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
 #endif
 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)      \
 class BaseClass##_##Method##_Benchmark : public BaseClass { \
 public:                                                     \
    BaseClass##_##Method##_Benchmark() : BaseClass() {      \
        this->SetName(#BaseClass "/" #Method);}             \
 protected:                                                  \
    virtual void BenchmarkCase(::benchmark::State&);        \
 };
 #define BENCHMARK_DEFINE_F(BaseClass, Method) \
    BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
    void BaseClass##_##Method##_Benchmark::BenchmarkCase
 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
    BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
    BENCHMARK_PRIVATE_DECLARE(TestName) = \
        (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
 // This macro will define and register a benchmark within a fixture class.
 #define BENCHMARK_F(BaseClass, Method) \
    BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
    BENCHMARK_REGISTER_F(BaseClass, Method); \
    void BaseClass##_##Method##_Benchmark::BenchmarkCase
 // Helper macro to create a main routine in a test that runs the benchmarks
 #define BENCHMARK_MAIN()                   \
  int main(int argc, char** argv) {        \
    ::benchmark::Initialize(&argc, argv);  \
    ::benchmark::RunSpecifiedBenchmarks(); \
  }
 #endif  // BENCHMARK_BENCHMARK_API_H_
--- a/utils/google-benchmark/include/benchmark/macros.h
+++ b/utils/google-benchmark/include/benchmark/macros.h
@@ -0,0 +1,56 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef BENCHMARK_MACROS_H_
 #define BENCHMARK_MACROS_H_
 #if __cplusplus < 201103L
 # define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName)  \
    TypeName(const TypeName&);                         \
    TypeName& operator=(const TypeName&)
 #else
 # define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName)  \
    TypeName(const TypeName&) = delete;                \
    TypeName& operator=(const TypeName&) = delete
 #endif
 #if defined(__GNUC__)
 # define BENCHMARK_UNUSED __attribute__((unused))
 # define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
 # define BENCHMARK_NOEXCEPT noexcept
 # define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
 #elif defined(_MSC_VER) && !defined(__clang__)
 # define BENCHMARK_UNUSED
 # define BENCHMARK_ALWAYS_INLINE __forceinline
 # if _MSC_VER >= 1900
 #  define BENCHMARK_NOEXCEPT noexcept
 #  define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
 # else
 #  define BENCHMARK_NOEXCEPT
 #  define BENCHMARK_NOEXCEPT_OP(x)
 # endif
 # define __func__ __FUNCTION__
 #else
 # define BENCHMARK_UNUSED
 # define BENCHMARK_ALWAYS_INLINE
 # define BENCHMARK_NOEXCEPT
 # define BENCHMARK_NOEXCEPT_OP(x)
 #endif
 #if defined(__GNUC__)
 # define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
 #else
 # define BENCHMARK_BUILTIN_EXPECT(x, y) x
 #endif
 #endif  // BENCHMARK_MACROS_H_
--- a/utils/google-benchmark/include/benchmark/reporter.h
+++ b/utils/google-benchmark/include/benchmark/reporter.h
@@ -0,0 +1,216 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef BENCHMARK_REPORTER_H_
 #define BENCHMARK_REPORTER_H_
 #include <cassert>
 #include <iosfwd>
 #include <string>
 #include <utility>
 #include <vector>
 #include "benchmark_api.h"  // For forward declaration of BenchmarkReporter
 namespace benchmark {
 // Interface for custom benchmark result printers.
 // By default, benchmark reports are printed to stdout. However an application
 // can control the destination of the reports by calling
 // RunSpecifiedBenchmarks and passing it a custom reporter object.
 // The reporter object must implement the following interface.
 class BenchmarkReporter {
 public:
  struct Context {
    int num_cpus;
    double mhz_per_cpu;
    bool cpu_scaling_enabled;
    // The number of chars in the longest benchmark name.
    size_t name_field_width;
  };
  struct Run {
    Run() :
      error_occurred(false),
      iterations(1),
      time_unit(kNanosecond),
      real_accumulated_time(0),
      cpu_accumulated_time(0),
      bytes_per_second(0),
      items_per_second(0),
      max_heapbytes_used(0),
      complexity(oNone),
      complexity_n(0),
      report_big_o(false),
      report_rms(false) {}
    std::string benchmark_name;
    std::string report_label;  // Empty if not set by benchmark.
    bool error_occurred;
    std::string error_message;
    int64_t iterations;
    TimeUnit time_unit;
    double real_accumulated_time;
    double cpu_accumulated_time;
    // Return a value representing the real time per iteration in the unit
    // specified by 'time_unit'.
    // NOTE: If 'iterations' is zero the returned value represents the
    // accumulated time.
    double GetAdjustedRealTime() const;
    // Return a value representing the cpu time per iteration in the unit
    // specified by 'time_unit'.
    // NOTE: If 'iterations' is zero the returned value represents the
    // accumulated time.
    double GetAdjustedCPUTime() const;
    // Zero if not set by benchmark.
    double bytes_per_second;
    double items_per_second;
    // This is set to 0.0 if memory tracing is not enabled.
    double max_heapbytes_used;
    // Keep track of arguments to compute asymptotic complexity
    BigO complexity;
    BigOFunc* complexity_lambda;
    int complexity_n;
    // Inform print function whether the current run is a complexity report
    bool report_big_o;
    bool report_rms;
  };
  // Construct a BenchmarkReporter with the output stream set to 'std::cout'
  // and the error stream set to 'std::cerr'
  BenchmarkReporter();
  // Called once for every suite of benchmarks run.
  // The parameter "context" contains information that the
  // reporter may wish to use when generating its report, for example the
  // platform under which the benchmarks are running. The benchmark run is
  // never started if this function returns false, allowing the reporter
  // to skip runs based on the context information.
  virtual bool ReportContext(const Context& context) = 0;
  // Called once for each group of benchmark runs, gives information about
  // cpu-time and heap memory usage during the benchmark run. If the group
  // of runs contained more than two entries then 'report' contains additional
  // elements representing the mean and standard deviation of those runs.
  // Additionally if this group of runs was the last in a family of benchmarks
  // 'reports' contains additional entries representing the asymptotic
  // complexity and RMS of that benchmark family.
  virtual void ReportRuns(const std::vector<Run>& report) = 0;
  // Called once and only once after ever group of benchmarks is run and
  // reported.
  virtual void Finalize() {}
  // REQUIRES: The object referenced by 'out' is valid for the lifetime
  // of the reporter.
  void SetOutputStream(std::ostream* out) {
    assert(out);
    output_stream_ = out;
  }
  // REQUIRES: The object referenced by 'err' is valid for the lifetime
  // of the reporter.
  void SetErrorStream(std::ostream* err) {
    assert(err);
    error_stream_ = err;
  }
  std::ostream& GetOutputStream() const {
    return *output_stream_;
  }
  std::ostream& GetErrorStream() const {
    return *error_stream_;
  }
  virtual ~BenchmarkReporter();
  // Write a human readable string to 'out' representing the specified
  // 'context'.
  // REQUIRES: 'out' is non-null.
  static void PrintBasicContext(std::ostream* out, Context const& context);
 private:
  std::ostream* output_stream_;
  std::ostream* error_stream_;
 };
 // Simple reporter that outputs benchmark data to the console. This is the
 // default reporter used by RunSpecifiedBenchmarks().
 class ConsoleReporter : public BenchmarkReporter {
 public:
  virtual bool ReportContext(const Context& context);
  virtual void ReportRuns(const std::vector<Run>& reports);
 protected:
  virtual void PrintRunData(const Run& report);
  size_t name_field_width_;
 };
 class JSONReporter : public BenchmarkReporter {
 public:
  JSONReporter() : first_report_(true) {}
  virtual bool ReportContext(const Context& context);
  virtual void ReportRuns(const std::vector<Run>& reports);
  virtual void Finalize();
 private:
  void PrintRunData(const Run& report);
  bool first_report_;
 };
 class CSVReporter : public BenchmarkReporter {
 public:
  virtual bool ReportContext(const Context& context);
  virtual void ReportRuns(const std::vector<Run>& reports);
 private:
  void PrintRunData(const Run& report);
 };
 inline const char* GetTimeUnitString(TimeUnit unit) {
  switch (unit) {
    case kMillisecond:
      return "ms";
    case kMicrosecond:
      return "us";
    case kNanosecond:
    default:
      return "ns";
  }
 }
 inline double GetTimeUnitMultiplier(TimeUnit unit) {
  switch (unit) {
    case kMillisecond:
      return 1e3;
    case kMicrosecond:
      return 1e6;
    case kNanosecond:
    default:
      return 1e9;
  }
 }
 }  // end namespace benchmark
 #endif  // BENCHMARK_REPORTER_H_
--- a/utils/google-benchmark/mingw.py
+++ b/utils/google-benchmark/mingw.py
@@ -0,0 +1,320 @@
 #! /usr/bin/env python
 # encoding: utf-8
 import argparse
 import errno
 import logging
 import os
 import platform
 import re
 import sys
 import subprocess
 import tempfile
 try:
    import winreg
 except ImportError:
    import _winreg as winreg
 try:
    import urllib.request as request
 except ImportError:
    import urllib as request
 try:
    import urllib.parse as parse
 except ImportError:
    import urlparse as parse
 class EmptyLogger(object):
    '''
    Provides an implementation that performs no logging
    '''
    def debug(self, *k, **kw):
        pass
    def info(self, *k, **kw):
        pass
    def warn(self, *k, **kw):
        pass
    def error(self, *k, **kw):
        pass
    def critical(self, *k, **kw):
        pass
    def setLevel(self, *k, **kw):
        pass
 urls = (
    'http://downloads.sourceforge.net/project/mingw-w64/Toolchains%20'
        'targetting%20Win32/Personal%20Builds/mingw-builds/installer/'
        'repository.txt',
    'http://downloads.sourceforge.net/project/mingwbuilds/host-windows/'
        'repository.txt'
 )
 '''
 A list of mingw-build repositories
 '''
 def repository(urls = urls, log = EmptyLogger()):
    '''
    Downloads and parse mingw-build repository files and parses them
    '''
    log.info('getting mingw-builds repository')
    versions = {}
    re_sourceforge = re.compile(r'http://sourceforge.net/projects/([^/]+)/files')
    re_sub = r'http://downloads.sourceforge.net/project/\1'
    for url in urls:
        log.debug(' - requesting: %s', url)
        socket = request.urlopen(url)
        repo = socket.read()
        if not isinstance(repo, str):
            repo = repo.decode();
        socket.close()
        for entry in repo.split('\n')[:-1]:
            value = entry.split('|')
            version = tuple([int(n) for n in value[0].strip().split('.')])
            version = versions.setdefault(version, {})
            arch = value[1].strip()
            if arch == 'x32':
                arch = 'i686'
            elif arch == 'x64':
                arch = 'x86_64'
            arch = version.setdefault(arch, {})
            threading = arch.setdefault(value[2].strip(), {})
            exceptions = threading.setdefault(value[3].strip(), {})
            revision = exceptions.setdefault(int(value[4].strip()[3:]),
                re_sourceforge.sub(re_sub, value[5].strip()))
    return versions
 def find_in_path(file, path=None):
    '''
    Attempts to find an executable in the path
    '''
    if platform.system() == 'Windows':
        file += '.exe'
    if path is None:
        path = os.environ.get('PATH', '')
    if type(path) is type(''):
        path = path.split(os.pathsep)
    return list(filter(os.path.exists,
        map(lambda dir, file=file: os.path.join(dir, file), path)))
 def find_7zip(log = EmptyLogger()):
    '''
    Attempts to find 7zip for unpacking the mingw-build archives
    '''
    log.info('finding 7zip')
    path = find_in_path('7z')
    if not path:
        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\7-Zip')
        path, _ = winreg.QueryValueEx(key, 'Path')
        path = [os.path.join(path, '7z.exe')]
    log.debug('found \'%s\'', path[0])
    return path[0]
 find_7zip()
 def unpack(archive, location, log = EmptyLogger()):
    '''
    Unpacks a mingw-builds archive
    '''
    sevenzip = find_7zip(log)
    log.info('unpacking %s', os.path.basename(archive))
    cmd = [sevenzip, 'x', archive, '-o' + location, '-y']
    log.debug(' - %r', cmd)
    with open(os.devnull, 'w') as devnull:
        subprocess.check_call(cmd, stdout = devnull)
 def download(url, location, log = EmptyLogger()):
    '''
    Downloads and unpacks a mingw-builds archive
    '''
    log.info('downloading MinGW')
    log.debug(' - url: %s', url)
    log.debug(' - location: %s', location)
    re_content = re.compile(r'attachment;[ \t]*filename=(")?([^"]*)(")?[\r\n]*')
    stream = request.urlopen(url)
    try:
        content = stream.getheader('Content-Disposition') or ''
    except AttributeError:
        content = stream.headers.getheader('Content-Disposition') or ''
    matches = re_content.match(content)
    if matches:
        filename = matches.group(2)
    else:
        parsed = parse.urlparse(stream.geturl())
        filename = os.path.basename(parsed.path)
    try:
        os.makedirs(location)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(location):
            pass
        else:
            raise
    archive = os.path.join(location, filename)
    with open(archive, 'wb') as out:
        while True:
            buf = stream.read(1024)
            if not buf:
                break
            out.write(buf)
    unpack(archive, location, log = log)
    os.remove(archive)
    possible = os.path.join(location, 'mingw64')
    if not os.path.exists(possible):
        possible = os.path.join(location, 'mingw32')
        if not os.path.exists(possible):
            raise ValueError('Failed to find unpacked MinGW: ' + possible)
    return possible
 def root(location = None, arch = None, version = None, threading = None,
        exceptions = None, revision = None, log = EmptyLogger()):
    '''
    Returns the root folder of a specific version of the mingw-builds variant
    of gcc. Will download the compiler if needed
    '''
    # Get the repository if we don't have all the information
    if not (arch and version and threading and exceptions and revision):
        versions = repository(log = log)
    # Determine some defaults
    version = version or max(versions.keys())
    if not arch:
        arch = platform.machine().lower()
        if arch == 'x86':
            arch = 'i686'
        elif arch == 'amd64':
            arch = 'x86_64'
    if not threading:
        keys = versions[version][arch].keys()
        if 'posix' in keys:
            threading = 'posix'
        elif 'win32' in keys:
            threading = 'win32'
        else:
            threading = keys[0]
    if not exceptions:
        keys = versions[version][arch][threading].keys()
        if 'seh' in keys:
            exceptions = 'seh'
        elif 'sjlj' in keys:
            exceptions = 'sjlj'
        else:
            exceptions = keys[0]
    if revision == None:
        revision = max(versions[version][arch][threading][exceptions].keys())
    if not location:
        location = os.path.join(tempfile.gettempdir(), 'mingw-builds')
    # Get the download url
    url = versions[version][arch][threading][exceptions][revision]
    # Tell the user whatzzup
    log.info('finding MinGW %s', '.'.join(str(v) for v in version))
    log.debug(' - arch: %s', arch)
    log.debug(' - threading: %s', threading)
    log.debug(' - exceptions: %s', exceptions)
    log.debug(' - revision: %s', revision)
    log.debug(' - url: %s', url)
    # Store each specific revision differently
    slug = '{version}-{arch}-{threading}-{exceptions}-rev{revision}'
    slug = slug.format(
        version = '.'.join(str(v) for v in version),
        arch = arch,
        threading = threading,
        exceptions = exceptions,
        revision = revision
    )
    if arch == 'x86_64':
        root_dir = os.path.join(location, slug, 'mingw64')
    elif arch == 'i686':
        root_dir = os.path.join(location, slug, 'mingw32')
    else:
        raise ValueError('Unknown MinGW arch: ' + arch)
    # Download if needed
    if not os.path.exists(root_dir):
        downloaded = download(url, os.path.join(location, slug), log = log)
        if downloaded != root_dir:
            raise ValueError('The location of mingw did not match\n%s\n%s'
                % (downloaded, root_dir))
    return root_dir
 def str2ver(string):
    '''
    Converts a version string into a tuple
    '''
    try:
        version = tuple(int(v) for v in string.split('.'))
        if len(version) is not 3:
            raise ValueError()
    except ValueError:
        raise argparse.ArgumentTypeError(
            'please provide a three digit version string')
    return version
 def main():
    '''
    Invoked when the script is run directly by the python interpreter
    '''
    parser = argparse.ArgumentParser(
        description = 'Downloads a specific version of MinGW',
        formatter_class = argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('--location',
        help = 'the location to download the compiler to',
        default = os.path.join(tempfile.gettempdir(), 'mingw-builds'))
    parser.add_argument('--arch', required = True, choices = ['i686', 'x86_64'],
        help = 'the target MinGW architecture string')
    parser.add_argument('--version', type = str2ver,
        help = 'the version of GCC to download')
    parser.add_argument('--threading', choices = ['posix', 'win32'],
        help = 'the threading type of the compiler')
    parser.add_argument('--exceptions', choices = ['sjlj', 'seh', 'dwarf'],
        help = 'the method to throw exceptions')
    parser.add_argument('--revision', type=int,
        help = 'the revision of the MinGW release')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-v', '--verbose', action='store_true',
        help='increase the script output verbosity')
    group.add_argument('-q', '--quiet', action='store_true',
        help='only print errors and warning')
    args = parser.parse_args()
    # Create the logger
    logger = logging.getLogger('mingw')
    handler = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)
    if args.quiet:
        logger.setLevel(logging.WARN)
    if args.verbose:
        logger.setLevel(logging.DEBUG)
    # Get MinGW
    root_dir = root(location = args.location, arch = args.arch,
        version = args.version, threading = args.threading,
        exceptions = args.exceptions, revision = args.revision,
        log = logger)
    sys.stdout.write('%s\n' % os.path.join(root_dir, 'bin'))
 if __name__ == '__main__':
    try:
        main()
    except IOError as e:
        sys.stderr.write('IO error: %s\n' % e)
        sys.exit(1)
    except OSError as e:
        sys.stderr.write('OS error: %s\n' % e)
        sys.exit(1)
    except KeyboardInterrupt as e:
        sys.stderr.write('Killed\n')
        sys.exit(1)
--- a/utils/google-benchmark/src/CMakeLists.txt
+++ b/utils/google-benchmark/src/CMakeLists.txt
@@ -0,0 +1,51 @@
 # Allow the source files to find headers in src/
 include_directories(${PROJECT_SOURCE_DIR}/src)
 # Define the source files
 set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc"
                 "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc"
                 "log.cc" "reporter.cc" "sleep.cc" "string_util.cc"
                 "sysinfo.cc" "walltime.cc" "complexity.cc")
 # Determine the correct regular expression engine to use
 if(HAVE_STD_REGEX)
  set(RE_FILES "re_std.cc")
 elseif(HAVE_GNU_POSIX_REGEX)
  set(RE_FILES "re_posix.cc")
 elseif(HAVE_POSIX_REGEX)
  set(RE_FILES "re_posix.cc")
 else()
  message(FATAL_ERROR "Failed to determine the source files for the regular expression backend")
 endif()
 add_library(benchmark ${SOURCE_FILES} ${RE_FILES})
 set_target_properties(benchmark PROPERTIES
  OUTPUT_NAME "benchmark"
  VERSION ${GENERIC_LIB_VERSION}
  SOVERSION ${GENERIC_LIB_SOVERSION}
 )
 # Link threads.
 target_link_libraries(benchmark ${CMAKE_THREAD_LIBS_INIT})
 # We need extra libraries on Windows
 if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
  target_link_libraries(benchmark Shlwapi)
 endif()
 # Expose public API
 target_include_directories(benchmark PUBLIC ${PROJECT_SOURCE_DIR}/include)
 # Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
 install(
  TARGETS benchmark
  ARCHIVE DESTINATION lib
  LIBRARY DESTINATION lib
  RUNTIME DESTINATION bin
  COMPONENT library)
 install(
  DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
  DESTINATION include
  FILES_MATCHING PATTERN "*.*h")
--- a/utils/google-benchmark/src/arraysize.h
+++ b/utils/google-benchmark/src/arraysize.h
@@ -0,0 +1,34 @@
 #ifndef BENCHMARK_ARRAYSIZE_H_
 #define BENCHMARK_ARRAYSIZE_H_
 #include "internal_macros.h"
 namespace benchmark {
 namespace internal {
 // The arraysize(arr) macro returns the # of elements in an array arr.
 // The expression is a compile-time constant, and therefore can be
 // used in defining new arrays, for example.  If you use arraysize on
 // a pointer by mistake, you will get a compile-time error.
 //
 // This template function declaration is used in defining arraysize.
 // Note that the function doesn't need an implementation, as we only
 // use its type.
 template <typename T, size_t N>
 char (&ArraySizeHelper(T (&array)[N]))[N];
 // That gcc wants both of these prototypes seems mysterious. VC, for
 // its part, can't decide which to use (another mystery). Matching of
 // template overloads: the final frontier.
 #ifndef COMPILER_MSVC
 template <typename T, size_t N>
 char (&ArraySizeHelper(const T (&array)[N]))[N];
 #endif
 #define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array)))
 } // end namespace internal
 } // end namespace benchmark
 #endif // BENCHMARK_ARRAYSIZE_H_
--- a/utils/google-benchmark/src/benchmark.cc
+++ b/utils/google-benchmark/src/benchmark.cc
--- a/utils/google-benchmark/src/check.h
+++ b/utils/google-benchmark/src/check.h
@@ -0,0 +1,72 @@
 #ifndef CHECK_H_
 #define CHECK_H_
 #include <cstdlib>
 #include <ostream>
 #include "internal_macros.h"
 #include "log.h"
 namespace benchmark {
 namespace internal {
 typedef void(AbortHandlerT)();
 inline AbortHandlerT*& GetAbortHandler() {
    static AbortHandlerT* handler = &std::abort;
    return handler;
 }
 BENCHMARK_NORETURN inline void CallAbortHandler() {
    GetAbortHandler()();
    std::abort(); // fallback to enforce noreturn
 }
 // CheckHandler is the class constructed by failing CHECK macros. CheckHandler
 // will log information about the failures and abort when it is destructed.
 class CheckHandler {
 public:
  CheckHandler(const char* check, const char* file, const char* func, int line)
    : log_(GetErrorLogInstance())
  {
    log_ << file << ":" << line << ": " << func << ": Check `"
          << check << "' failed. ";
  }
  std::ostream& GetLog() {
    return log_;
  }
  BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
      log_ << std::endl;
      CallAbortHandler();
  }
  CheckHandler & operator=(const CheckHandler&) = delete;
  CheckHandler(const CheckHandler&) = delete;
  CheckHandler() = delete;
 private:
  std::ostream& log_;
 };
 } // end namespace internal
 } // end namespace benchmark
 // The CHECK macro returns a std::ostream object that can have extra information
 // written to it.
 #ifndef NDEBUG
 # define CHECK(b)  (b ? ::benchmark::internal::GetNullLogInstance()        \
                      : ::benchmark::internal::CheckHandler(               \
                          #b, __FILE__, __func__, __LINE__).GetLog())
 #else
 # define CHECK(b) ::benchmark::internal::GetNullLogInstance()
 #endif
 #define CHECK_EQ(a, b) CHECK((a) == (b))
 #define CHECK_NE(a, b) CHECK((a) != (b))
 #define CHECK_GE(a, b) CHECK((a) >= (b))
 #define CHECK_LE(a, b) CHECK((a) <= (b))
 #define CHECK_GT(a, b) CHECK((a) > (b))
 #define CHECK_LT(a, b) CHECK((a) < (b))
 #endif  // CHECK_H_
--- a/utils/google-benchmark/src/colorprint.cc
+++ b/utils/google-benchmark/src/colorprint.cc
@@ -0,0 +1,158 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "colorprint.h"
 #include <cstdarg>
 #include <cstdio>
 #include <cstdarg>
 #include <string>
 #include <memory>
 #include "commandlineflags.h"
 #include "check.h"
 #include "internal_macros.h"
 #ifdef BENCHMARK_OS_WINDOWS
 #include <Windows.h>
 #endif
 DECLARE_bool(color_print);
 namespace benchmark {
 namespace {
 #ifdef BENCHMARK_OS_WINDOWS
 typedef WORD PlatformColorCode;
 #else
 typedef const char* PlatformColorCode;
 #endif
 PlatformColorCode GetPlatformColorCode(LogColor color) {
 #ifdef BENCHMARK_OS_WINDOWS
  switch (color) {
    case COLOR_RED:
      return FOREGROUND_RED;
    case COLOR_GREEN:
      return FOREGROUND_GREEN;
    case COLOR_YELLOW:
      return FOREGROUND_RED | FOREGROUND_GREEN;
    case COLOR_BLUE:
      return FOREGROUND_BLUE;
    case COLOR_MAGENTA:
      return FOREGROUND_BLUE | FOREGROUND_RED;
    case COLOR_CYAN:
      return FOREGROUND_BLUE | FOREGROUND_GREEN;
    case COLOR_WHITE:  // fall through to default
    default:
      return 0;
  }
 #else
  switch (color) {
    case COLOR_RED:
      return "1";
    case COLOR_GREEN:
      return "2";
    case COLOR_YELLOW:
      return "3";
    case COLOR_BLUE:
      return "4";
    case COLOR_MAGENTA:
      return "5";
    case COLOR_CYAN:
      return "6";
    case COLOR_WHITE:
      return "7";
    default:
      return nullptr;
  };
 #endif
 }
 }  // end namespace
 std::string FormatString(const char *msg, va_list args) {
  // we might need a second shot at this, so pre-emptivly make a copy
  va_list args_cp;
  va_copy(args_cp, args);
  std::size_t size = 256;
  char local_buff[256];
  auto ret = std::vsnprintf(local_buff, size, msg, args_cp);
  va_end(args_cp);
  // currently there is no error handling for failure, so this is hack.
  CHECK(ret >= 0);
  if (ret == 0) // handle empty expansion
    return {};
  else if (static_cast<size_t>(ret) < size)
    return local_buff;
  else {
    // we did not provide a long enough buffer on our first attempt.
    size = (size_t)ret + 1; // + 1 for the null byte
    std::unique_ptr<char[]> buff(new char[size]);
    ret = std::vsnprintf(buff.get(), size, msg, args);
    CHECK(ret > 0 && ((size_t)ret) < size);
    return buff.get();
  }
 }
 std::string FormatString(const char *msg, ...) {
  va_list args;
  va_start(args, msg);
  auto tmp = FormatString(msg, args);
  va_end(args);
  return tmp;
 }
 void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) {
  va_list args;
  va_start(args, fmt);
  if (!FLAGS_color_print) {
    out << FormatString(fmt, args);
    va_end(args);
    return;
  }
 #ifdef BENCHMARK_OS_WINDOWS
  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
  // Gets the current text color.
  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
  const WORD old_color_attrs = buffer_info.wAttributes;
  // We need to flush the stream buffers into the console before each
  // SetConsoleTextAttribute call lest it affect the text that is already
  // printed but has not yet reached the console.
  fflush(stdout);
  SetConsoleTextAttribute(stdout_handle,
                          GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
  vprintf(fmt, args);
  fflush(stdout);
  // Restores the text color.
  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
 #else
  const char* color_code = GetPlatformColorCode(color);
  if (color_code) out << FormatString("\033[0;3%sm", color_code);
  out << FormatString(fmt, args) << "\033[m";
 #endif
  va_end(args);
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/colorprint.h
+++ b/utils/google-benchmark/src/colorprint.h
@@ -0,0 +1,27 @@
 #ifndef BENCHMARK_COLORPRINT_H_
 #define BENCHMARK_COLORPRINT_H_
 #include <cstdarg>
 #include <string>
 #include <iostream>
 namespace benchmark {
 enum LogColor {
  COLOR_DEFAULT,
  COLOR_RED,
  COLOR_GREEN,
  COLOR_YELLOW,
  COLOR_BLUE,
  COLOR_MAGENTA,
  COLOR_CYAN,
  COLOR_WHITE
 };
 std::string FormatString(const char* msg, va_list args);
 std::string FormatString(const char* msg, ...);
 void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...);
 }  // end namespace benchmark
 #endif  // BENCHMARK_COLORPRINT_H_
--- a/utils/google-benchmark/src/commandlineflags.cc
+++ b/utils/google-benchmark/src/commandlineflags.cc
@@ -0,0 +1,220 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "commandlineflags.h"
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
 #include <limits>
 namespace benchmark {
 // Parses 'str' for a 32-bit signed integer.  If successful, writes
 // the result to *value and returns true; otherwise leaves *value
 // unchanged and returns false.
 bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
  // Parses the environment variable as a decimal integer.
  char* end = nullptr;
  const long long_value = strtol(str, &end, 10);  // NOLINT
  // Has strtol() consumed all characters in the string?
  if (*end != '\0') {
    // No - an invalid character was encountered.
    std::cerr << src_text << " is expected to be a 32-bit integer, "
              << "but actually has value \"" << str << "\".\n";
    return false;
  }
  // Is the parsed value in the range of an Int32?
  const int32_t result = static_cast<int32_t>(long_value);
  if (long_value == std::numeric_limits<long>::max() ||
      long_value == std::numeric_limits<long>::min() ||
      // The parsed value overflows as a long.  (strtol() returns
      // LONG_MAX or LONG_MIN when the input overflows.)
      result != long_value
          // The parsed value overflows as an Int32.
      ) {
    std::cerr << src_text << " is expected to be a 32-bit integer, "
              << "but actually has value \"" << str << "\", "
              << "which overflows.\n";
    return false;
  }
  *value = result;
  return true;
 }
 // Parses 'str' for a double.  If successful, writes the result to *value and
 // returns true; otherwise leaves *value unchanged and returns false.
 bool ParseDouble(const std::string& src_text, const char* str, double* value) {
  // Parses the environment variable as a decimal integer.
  char* end = nullptr;
  const double double_value = strtod(str, &end);  // NOLINT
  // Has strtol() consumed all characters in the string?
  if (*end != '\0') {
    // No - an invalid character was encountered.
    std::cerr << src_text << " is expected to be a double, "
              << "but actually has value \"" << str << "\".\n";
    return false;
  }
  *value = double_value;
  return true;
 }
 inline const char* GetEnv(const char* name) {
 #if defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
  // Environment variables which we programmatically clear will be set to the
  // empty string rather than unset (nullptr).  Handle that case.
  const char* const env = getenv(name);
  return (env != nullptr && env[0] != '\0') ? env : nullptr;
 #else
  return getenv(name);
 #endif
 }
 // Returns the name of the environment variable corresponding to the
 // given flag.  For example, FlagToEnvVar("foo") will return
 // "BENCHMARK_FOO" in the open-source version.
 static std::string FlagToEnvVar(const char* flag) {
  const std::string flag_str(flag);
  std::string env_var;
  for (size_t i = 0; i != flag_str.length(); ++i)
    env_var += static_cast<char>(::toupper(flag_str.c_str()[i]));
  return "BENCHMARK_" + env_var;
 }
 // Reads and returns the Boolean environment variable corresponding to
 // the given flag; if it's not set, returns default_value.
 //
 // The value is considered true iff it's not "0".
 bool BoolFromEnv(const char* flag, bool default_value) {
  const std::string env_var = FlagToEnvVar(flag);
  const char* const string_value = GetEnv(env_var.c_str());
  return string_value == nullptr ? default_value : strcmp(string_value, "0") != 0;
 }
 // Reads and returns a 32-bit integer stored in the environment
 // variable corresponding to the given flag; if it isn't set or
 // doesn't represent a valid 32-bit integer, returns default_value.
 int32_t Int32FromEnv(const char* flag, int32_t default_value) {
  const std::string env_var = FlagToEnvVar(flag);
  const char* const string_value = GetEnv(env_var.c_str());
  if (string_value == nullptr) {
    // The environment variable is not set.
    return default_value;
  }
  int32_t result = default_value;
  if (!ParseInt32(std::string("Environment variable ") + env_var, string_value,
                  &result)) {
    std::cout << "The default value " << default_value << " is used.\n";
    return default_value;
  }
  return result;
 }
 // Reads and returns the string environment variable corresponding to
 // the given flag; if it's not set, returns default_value.
 const char* StringFromEnv(const char* flag, const char* default_value) {
  const std::string env_var = FlagToEnvVar(flag);
  const char* const value = GetEnv(env_var.c_str());
  return value == nullptr ? default_value : value;
 }
 // Parses a string as a command line flag.  The string should have
 // the format "--flag=value".  When def_optional is true, the "=value"
 // part can be omitted.
 //
 // Returns the value of the flag, or nullptr if the parsing failed.
 const char* ParseFlagValue(const char* str, const char* flag,
                           bool def_optional) {
  // str and flag must not be nullptr.
  if (str == nullptr || flag == nullptr) return nullptr;
  // The flag must start with "--".
  const std::string flag_str = std::string("--") + std::string(flag);
  const size_t flag_len = flag_str.length();
  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
  // Skips the flag name.
  const char* flag_end = str + flag_len;
  // When def_optional is true, it's OK to not have a "=value" part.
  if (def_optional && (flag_end[0] == '\0')) return flag_end;
  // If def_optional is true and there are more characters after the
  // flag name, or if def_optional is false, there must be a '=' after
  // the flag name.
  if (flag_end[0] != '=') return nullptr;
  // Returns the string after "=".
  return flag_end + 1;
 }
 bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, true);
  // Aborts if the parsing failed.
  if (value_str == nullptr) return false;
  // Converts the string value to a bool.
  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
  return true;
 }
 bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);
  // Aborts if the parsing failed.
  if (value_str == nullptr) return false;
  // Sets *value to the value of the flag.
  return ParseInt32(std::string("The value of flag --") + flag, value_str,
                    value);
 }
 bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);
  // Aborts if the parsing failed.
  if (value_str == nullptr) return false;
  // Sets *value to the value of the flag.
  return ParseDouble(std::string("The value of flag --") + flag, value_str,
                     value);
 }
 bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
  // Gets the value of the flag as a string.
  const char* const value_str = ParseFlagValue(str, flag, false);
  // Aborts if the parsing failed.
  if (value_str == nullptr) return false;
  *value = value_str;
  return true;
 }
 bool IsFlag(const char* str, const char* flag) {
  return (ParseFlagValue(str, flag, true) != nullptr);
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/commandlineflags.h
+++ b/utils/google-benchmark/src/commandlineflags.h
@@ -0,0 +1,76 @@
 #ifndef BENCHMARK_COMMANDLINEFLAGS_H_
 #define BENCHMARK_COMMANDLINEFLAGS_H_
 #include <cstdint>
 #include <string>
 // Macro for referencing flags.
 #define FLAG(name) FLAGS_##name
 // Macros for declaring flags.
 #define DECLARE_bool(name) extern bool FLAG(name)
 #define DECLARE_int32(name) extern int32_t FLAG(name)
 #define DECLARE_int64(name) extern int64_t FLAG(name)
 #define DECLARE_double(name) extern double FLAG(name)
 #define DECLARE_string(name) extern std::string FLAG(name)
 // Macros for defining flags.
 #define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
 #define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
 #define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
 #define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
 #define DEFINE_string(name, default_val, doc) \
  std::string FLAG(name) = (default_val)
 namespace benchmark {
 // Parses 'str' for a 32-bit signed integer.  If successful, writes the result
 // to *value and returns true; otherwise leaves *value unchanged and returns
 // false.
 bool ParseInt32(const std::string& src_text, const char* str, int32_t* value);
 // Parses a bool/Int32/string from the environment variable
 // corresponding to the given Google Test flag.
 bool BoolFromEnv(const char* flag, bool default_val);
 int32_t Int32FromEnv(const char* flag, int32_t default_val);
 double DoubleFromEnv(const char* flag, double default_val);
 const char* StringFromEnv(const char* flag, const char* default_val);
 // Parses a string for a bool flag, in the form of either
 // "--flag=value" or "--flag".
 //
 // In the former case, the value is taken as true as long as it does
 // not start with '0', 'f', or 'F'.
 //
 // In the latter case, the value is taken as true.
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
 bool ParseBoolFlag(const char* str, const char* flag, bool* value);
 // Parses a string for an Int32 flag, in the form of
 // "--flag=value".
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
 bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
 // Parses a string for a Double flag, in the form of
 // "--flag=value".
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
 bool ParseDoubleFlag(const char* str, const char* flag, double* value);
 // Parses a string for a string flag, in the form of
 // "--flag=value".
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
 bool ParseStringFlag(const char* str, const char* flag, std::string* value);
 // Returns true if the string matches the flag.
 bool IsFlag(const char* str, const char* flag);
 }  // end namespace benchmark
 #endif  // BENCHMARK_COMMANDLINEFLAGS_H_
--- a/utils/google-benchmark/src/complexity.cc
+++ b/utils/google-benchmark/src/complexity.cc
@@ -0,0 +1,283 @@
 // Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // Source project : https://github.com/ismaelJimenez/cpp.leastsq
 // Adapted to be used with google benchmark
 #include "benchmark/benchmark_api.h"
 #include <algorithm>
 #include <cmath>
 #include "check.h"
 #include "complexity.h"
 #include "stat.h"
 namespace benchmark {
 // Internal function to calculate the different scalability forms
 BigOFunc* FittingCurve(BigO complexity) {
  switch (complexity) {
    case oN:
      return [](int n) -> double { return n; };
    case oNSquared:
      return [](int n) -> double { return n * n; };
    case oNCubed:
      return [](int n) -> double { return n * n * n; };
    case oLogN:
      return [](int n) { return std::log2(n); };
    case oNLogN:
      return [](int n) { return n * std::log2(n); };
    case o1:
    default:
      return [](int) { return 1.0; };
  }
 }
 // Function to return an string for the calculated complexity
 std::string GetBigOString(BigO complexity) {
  switch (complexity) {
    case oN:
      return "N";
    case oNSquared:
      return "N^2";
    case oNCubed:
      return "N^3";
    case oLogN:
      return "lgN";
    case oNLogN:
      return "NlgN";
    case o1:
      return "(1)";
    default:
      return "f(N)";
  }
 }
 // Find the coefficient for the high-order term in the running time, by
 // minimizing the sum of squares of relative error, for the fitting curve
 // given by the lambda expresion.
 //   - n             : Vector containing the size of the benchmark tests.
 //   - time          : Vector containing the times for the benchmark tests.
 //   - fitting_curve : lambda expresion (e.g. [](int n) {return n; };).
 // For a deeper explanation on the algorithm logic, look the README file at
 // http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit
 LeastSq MinimalLeastSq(const std::vector<int>& n,
                       const std::vector<double>& time,
                       BigOFunc* fitting_curve) {
  double sigma_gn = 0.0;
  double sigma_gn_squared = 0.0;
  double sigma_time = 0.0;
  double sigma_time_gn = 0.0;
  // Calculate least square fitting parameter
  for (size_t i = 0; i < n.size(); ++i) {
    double gn_i = fitting_curve(n[i]);
    sigma_gn += gn_i;
    sigma_gn_squared += gn_i * gn_i;
    sigma_time += time[i];
    sigma_time_gn += time[i] * gn_i;
  }
  LeastSq result;
  result.complexity = oLambda;
  // Calculate complexity.
  result.coef = sigma_time_gn / sigma_gn_squared;
  // Calculate RMS
  double rms = 0.0;
  for (size_t i = 0; i < n.size(); ++i) {
    double fit = result.coef * fitting_curve(n[i]);
    rms += pow((time[i] - fit), 2);
  }
  // Normalized RMS by the mean of the observed values
  double mean = sigma_time / n.size();
  result.rms = sqrt(rms / n.size()) / mean;
  return result;
 }
 // Find the coefficient for the high-order term in the running time, by
 // minimizing the sum of squares of relative error.
 //   - n          : Vector containing the size of the benchmark tests.
 //   - time       : Vector containing the times for the benchmark tests.
 //   - complexity : If different than oAuto, the fitting curve will stick to
 //                  this one. If it is oAuto, it will be calculated the best
 //                  fitting curve.
 LeastSq MinimalLeastSq(const std::vector<int>& n,
                       const std::vector<double>& time,
                       const BigO complexity) {
  CHECK_EQ(n.size(), time.size());
  CHECK_GE(n.size(), 2);  // Do not compute fitting curve is less than two
                          // benchmark runs are given
  CHECK_NE(complexity, oNone);
  LeastSq best_fit;
  if (complexity == oAuto) {
    std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed};
    // Take o1 as default best fitting curve
    best_fit = MinimalLeastSq(n, time, FittingCurve(o1));
    best_fit.complexity = o1;
    // Compute all possible fitting curves and stick to the best one
    for (const auto& fit : fit_curves) {
      LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit));
      if (current_fit.rms < best_fit.rms) {
        best_fit = current_fit;
        best_fit.complexity = fit;
      }
    }
  } else {
    best_fit = MinimalLeastSq(n, time, FittingCurve(complexity));
    best_fit.complexity = complexity;
  }
  return best_fit;
 }
 std::vector<BenchmarkReporter::Run> ComputeStats(
    const std::vector<BenchmarkReporter::Run>& reports) {
  typedef BenchmarkReporter::Run Run;
  std::vector<Run> results;
  auto error_count =
      std::count_if(reports.begin(), reports.end(),
                    [](Run const& run) { return run.error_occurred; });
  if (reports.size() - error_count < 2) {
    // We don't report aggregated data if there was a single run.
    return results;
  }
  // Accumulators.
  Stat1_d real_accumulated_time_stat;
  Stat1_d cpu_accumulated_time_stat;
  Stat1_d bytes_per_second_stat;
  Stat1_d items_per_second_stat;
  // All repetitions should be run with the same number of iterations so we
  // can take this information from the first benchmark.
  int64_t const run_iterations = reports.front().iterations;
  // Populate the accumulators.
  for (Run const& run : reports) {
    CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
    CHECK_EQ(run_iterations, run.iterations);
    if (run.error_occurred) continue;
    real_accumulated_time_stat +=
        Stat1_d(run.real_accumulated_time / run.iterations, run.iterations);
    cpu_accumulated_time_stat +=
        Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations);
    items_per_second_stat += Stat1_d(run.items_per_second, run.iterations);
    bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations);
  }
  // Get the data from the accumulator to BenchmarkReporter::Run's.
  Run mean_data;
  mean_data.benchmark_name = reports[0].benchmark_name + "_mean";
  mean_data.iterations = run_iterations;
  mean_data.real_accumulated_time =
      real_accumulated_time_stat.Mean() * run_iterations;
  mean_data.cpu_accumulated_time =
      cpu_accumulated_time_stat.Mean() * run_iterations;
  mean_data.bytes_per_second = bytes_per_second_stat.Mean();
  mean_data.items_per_second = items_per_second_stat.Mean();
  // Only add label to mean/stddev if it is same for all runs
  mean_data.report_label = reports[0].report_label;
  for (std::size_t i = 1; i < reports.size(); i++) {
    if (reports[i].report_label != reports[0].report_label) {
      mean_data.report_label = "";
      break;
    }
  }
  Run stddev_data;
  stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev";
  stddev_data.report_label = mean_data.report_label;
  stddev_data.iterations = 0;
  stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev();
  stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev();
  stddev_data.bytes_per_second = bytes_per_second_stat.StdDev();
  stddev_data.items_per_second = items_per_second_stat.StdDev();
  results.push_back(mean_data);
  results.push_back(stddev_data);
  return results;
 }
 std::vector<BenchmarkReporter::Run> ComputeBigO(
    const std::vector<BenchmarkReporter::Run>& reports) {
  typedef BenchmarkReporter::Run Run;
  std::vector<Run> results;
  if (reports.size() < 2) return results;
  // Accumulators.
  std::vector<int> n;
  std::vector<double> real_time;
  std::vector<double> cpu_time;
  // Populate the accumulators.
  for (const Run& run : reports) {
    CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?";
    n.push_back(run.complexity_n);
    real_time.push_back(run.real_accumulated_time / run.iterations);
    cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
  }
  LeastSq result_cpu;
  LeastSq result_real;
  if (reports[0].complexity == oLambda) {
    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
    result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
  } else {
    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
    result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
  }
  std::string benchmark_name =
      reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/'));
  // Get the data from the accumulator to BenchmarkReporter::Run's.
  Run big_o;
  big_o.benchmark_name = benchmark_name + "_BigO";
  big_o.iterations = 0;
  big_o.real_accumulated_time = result_real.coef;
  big_o.cpu_accumulated_time = result_cpu.coef;
  big_o.report_big_o = true;
  big_o.complexity = result_cpu.complexity;
  double multiplier = GetTimeUnitMultiplier(reports[0].time_unit);
  // Only add label to mean/stddev if it is same for all runs
  Run rms;
  big_o.report_label = reports[0].report_label;
  rms.benchmark_name = benchmark_name + "_RMS";
  rms.report_label = big_o.report_label;
  rms.iterations = 0;
  rms.real_accumulated_time = result_real.rms / multiplier;
  rms.cpu_accumulated_time = result_cpu.rms / multiplier;
  rms.report_rms = true;
  rms.complexity = result_cpu.complexity;
  results.push_back(big_o);
  results.push_back(rms);
  return results;
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/complexity.h
+++ b/utils/google-benchmark/src/complexity.h
@@ -0,0 +1,64 @@
 // Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // Source project : https://github.com/ismaelJimenez/cpp.leastsq
 // Adapted to be used with google benchmark
 #ifndef COMPLEXITY_H_
 #define COMPLEXITY_H_
 #include <string>
 #include <vector>
 #include "benchmark/benchmark_api.h"
 #include "benchmark/reporter.h"
 namespace benchmark {
 // Return a vector containing the mean and standard devation information for
 // the specified list of reports. If 'reports' contains less than two
 // non-errored runs an empty vector is returned
 std::vector<BenchmarkReporter::Run> ComputeStats(
    const std::vector<BenchmarkReporter::Run>& reports);
 // Return a vector containing the bigO and RMS information for the specified
 // list of reports. If 'reports.size() < 2' an empty vector is returned.
 std::vector<BenchmarkReporter::Run> ComputeBigO(
    const std::vector<BenchmarkReporter::Run>& reports);
 // This data structure will contain the result returned by MinimalLeastSq
 //   - coef        : Estimated coeficient for the high-order term as
 //                   interpolated from data.
 //   - rms         : Normalized Root Mean Squared Error.
 //   - complexity  : Scalability form (e.g. oN, oNLogN). In case a scalability
 //                   form has been provided to MinimalLeastSq this will return
 //                   the same value. In case BigO::oAuto has been selected, this
 //                   parameter will return the best fitting curve detected.
 struct LeastSq {
  LeastSq() :
    coef(0.0),
    rms(0.0),
    complexity(oNone) {}
  double coef;
  double rms;
  BigO complexity;
 };
 // Function to return an string for the calculated complexity
 std::string GetBigOString(BigO complexity);
 } // end namespace benchmark
 #endif // COMPLEXITY_H_
--- a/utils/google-benchmark/src/console_reporter.cc
+++ b/utils/google-benchmark/src/console_reporter.cc
@@ -0,0 +1,124 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "benchmark/reporter.h"
 #include "complexity.h"
 #include <algorithm>
 #include <cstdint>
 #include <cstdio>
 #include <iostream>
 #include <string>
 #include <tuple>
 #include <vector>
 #include "check.h"
 #include "colorprint.h"
 #include "commandlineflags.h"
 #include "internal_macros.h"
 #include "string_util.h"
 #include "walltime.h"
 DECLARE_bool(color_print);
 namespace benchmark {
 bool ConsoleReporter::ReportContext(const Context& context) {
  name_field_width_ = context.name_field_width;
  PrintBasicContext(&GetErrorStream(), context);
 #ifdef BENCHMARK_OS_WINDOWS
  if (FLAGS_color_print && &std::cout != &GetOutputStream()) {
      GetErrorStream() << "Color printing is only supported for stdout on windows."
                          " Disabling color printing\n";
      FLAGS_color_print = false;
  }
 #endif
  std::string str = FormatString("%-*s %13s %13s %10s\n",
                             static_cast<int>(name_field_width_), "Benchmark",
                             "Time", "CPU", "Iterations");
  GetOutputStream() << str << std::string(str.length() - 1, '-') << "\n";
  return true;
 }
 void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
  for (const auto& run : reports)
    PrintRunData(run);
 }
 void ConsoleReporter::PrintRunData(const Run& result) {
  auto& Out = GetOutputStream();
  auto name_color =
      (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
  ColorPrintf(Out, name_color, "%-*s ", name_field_width_,
              result.benchmark_name.c_str());
  if (result.error_occurred) {
    ColorPrintf(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
                result.error_message.c_str());
    ColorPrintf(Out, COLOR_DEFAULT, "\n");
    return;
  }
  // Format bytes per second
  std::string rate;
  if (result.bytes_per_second > 0) {
    rate = StrCat(" ", HumanReadableNumber(result.bytes_per_second), "B/s");
  }
  // Format items per second
  std::string items;
  if (result.items_per_second > 0) {
    items = StrCat(" ", HumanReadableNumber(result.items_per_second),
                   " items/s");
 }
  const double real_time = result.GetAdjustedRealTime();
  const double cpu_time = result.GetAdjustedCPUTime();
  if (result.report_big_o) {
    std::string big_o = GetBigOString(result.complexity);
    ColorPrintf(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time,
                big_o.c_str(), cpu_time, big_o.c_str());
  } else if (result.report_rms) {
    ColorPrintf(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100,
                cpu_time * 100);
  } else {
    const char* timeLabel = GetTimeUnitString(result.time_unit);
    ColorPrintf(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel,
                cpu_time, timeLabel);
  }
  if (!result.report_big_o && !result.report_rms) {
    ColorPrintf(Out, COLOR_CYAN, "%10lld", result.iterations);
  }
  if (!rate.empty()) {
    ColorPrintf(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str());
  }
  if (!items.empty()) {
    ColorPrintf(Out, COLOR_DEFAULT, " %*s", 18, items.c_str());
  }
  if (!result.report_label.empty()) {
    ColorPrintf(Out, COLOR_DEFAULT, " %s", result.report_label.c_str());
  }
  ColorPrintf(Out, COLOR_DEFAULT, "\n");
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/csv_reporter.cc
+++ b/utils/google-benchmark/src/csv_reporter.cc
@@ -0,0 +1,118 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "benchmark/reporter.h"
 #include "complexity.h"
 #include <algorithm>
 #include <cstdint>
 #include <iostream>
 #include <string>
 #include <tuple>
 #include <vector>
 #include "string_util.h"
 #include "walltime.h"
 // File format reference: http://edoceo.com/utilitas/csv-file-format.
 namespace benchmark {
 namespace {
 std::vector<std::string> elements = {
  "name",
  "iterations",
  "real_time",
  "cpu_time",
  "time_unit",
  "bytes_per_second",
  "items_per_second",
  "label",
  "error_occurred",
  "error_message"
 };
 }
 bool CSVReporter::ReportContext(const Context& context) {
  PrintBasicContext(&GetErrorStream(), context);
  std::ostream& Out = GetOutputStream();
  for (auto B = elements.begin(); B != elements.end(); ) {
    Out << *B++;
    if (B != elements.end())
      Out << ",";
  }
  Out << "\n";
  return true;
 }
 void CSVReporter::ReportRuns(const std::vector<Run> & reports) {
  for (const auto& run : reports)
    PrintRunData(run);
 }
 void CSVReporter::PrintRunData(const Run & run) {
  std::ostream& Out = GetOutputStream();
  // Field with embedded double-quote characters must be doubled and the field
  // delimited with double-quotes.
  std::string name = run.benchmark_name;
  ReplaceAll(&name, "\"", "\"\"");
  Out << '"' << name << "\",";
  if (run.error_occurred) {
    Out << std::string(elements.size() - 3, ',');
    Out << "true,";
    std::string msg = run.error_message;
    ReplaceAll(&msg, "\"", "\"\"");
    Out << '"' << msg << "\"\n";
    return;
  }
  // Do not print iteration on bigO and RMS report
  if (!run.report_big_o && !run.report_rms) {
    Out << run.iterations;
  }
  Out << ",";
  Out << run.GetAdjustedRealTime() << ",";
  Out << run.GetAdjustedCPUTime() << ",";
  // Do not print timeLabel on bigO and RMS report
  if (run.report_big_o) {
    Out << GetBigOString(run.complexity);
  } else if (!run.report_rms) {
    Out << GetTimeUnitString(run.time_unit);
  }
  Out << ",";
  if (run.bytes_per_second > 0.0) {
    Out << run.bytes_per_second;
  }
  Out << ",";
  if (run.items_per_second > 0.0) {
    Out << run.items_per_second;
  }
  Out << ",";
  if (!run.report_label.empty()) {
    // Field with embedded double-quote characters must be doubled and the field
    // delimited with double-quotes.
    std::string label = run.report_label;
    ReplaceAll(&label, "\"", "\"\"");
    Out << "\"" << label << "\"";
  }
  Out << ",,";  // for error_occurred and error_message
  Out << '\n';
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/cycleclock.h
+++ b/utils/google-benchmark/src/cycleclock.h
@@ -0,0 +1,145 @@
 // ----------------------------------------------------------------------
 // CycleClock
 //    A CycleClock tells you the current time in Cycles.  The "time"
 //    is actually time since power-on.  This is like time() but doesn't
 //    involve a system call and is much more precise.
 //
 // NOTE: Not all cpu/platform/kernel combinations guarantee that this
 // clock increments at a constant rate or is synchronized across all logical
 // cpus in a system.
 //
 // If you need the above guarantees, please consider using a different
 // API. There are efforts to provide an interface which provides a millisecond
 // granularity and implemented as a memory read. A memory read is generally
 // cheaper than the CycleClock for many architectures.
 //
 // Also, in some out of order CPU implementations, the CycleClock is not
 // serializing. So if you're trying to count at cycles granularity, your
 // data might be inaccurate due to out of order instruction execution.
 // ----------------------------------------------------------------------
 #ifndef BENCHMARK_CYCLECLOCK_H_
 #define BENCHMARK_CYCLECLOCK_H_
 #include <cstdint>
 #include "benchmark/macros.h"
 #include "internal_macros.h"
 #if defined(BENCHMARK_OS_MACOSX)
 #include <mach/mach_time.h>
 #endif
 // For MSVC, we want to use '_asm rdtsc' when possible (since it works
 // with even ancient MSVC compilers), and when not possible the
 // __rdtsc intrinsic, declared in <intrin.h>.  Unfortunately, in some
 // environments, <windows.h> and <intrin.h> have conflicting
 // declarations of some other intrinsics, breaking compilation.
 // Therefore, we simply declare __rdtsc ourselves. See also
 // http://connect.microsoft.com/VisualStudio/feedback/details/262047
 #if defined(COMPILER_MSVC) && !defined(_M_IX86)
 extern "C" uint64_t __rdtsc();
 #pragma intrinsic(__rdtsc)
 #endif
 #ifndef BENCHMARK_OS_WINDOWS
 #include <sys/time.h>
 #endif
 namespace benchmark {
 // NOTE: only i386 and x86_64 have been well tested.
 // PPC, sparc, alpha, and ia64 are based on
 //    http://peter.kuscsik.com/wordpress/?p=14
 // with modifications by m3b.  See also
 //    https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
 namespace cycleclock {
 // This should return the number of cycles since power-on.  Thread-safe.
 inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
 #if defined(BENCHMARK_OS_MACOSX)
  // this goes at the top because we need ALL Macs, regardless of
  // architecture, to return the number of "mach time units" that
  // have passed since startup.  See sysinfo.cc where
  // InitializeSystemInfo() sets the supposed cpu clock frequency of
  // macs to the number of mach time units per second, not actual
  // CPU clock frequency (which can change in the face of CPU
  // frequency scaling).  Also note that when the Mac sleeps, this
  // counter pauses; it does not continue counting, nor does it
  // reset to zero.
  return mach_absolute_time();
 #elif defined(__i386__)
  int64_t ret;
  __asm__ volatile("rdtsc" : "=A"(ret));
  return ret;
 #elif defined(__x86_64__) || defined(__amd64__)
  uint64_t low, high;
  __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
  return (high << 32) | low;
 #elif defined(__powerpc__) || defined(__ppc__)
  // This returns a time-base, which is not always precisely a cycle-count.
  int64_t tbl, tbu0, tbu1;
  asm("mftbu %0" : "=r"(tbu0));
  asm("mftb  %0" : "=r"(tbl));
  asm("mftbu %0" : "=r"(tbu1));
  tbl &= -static_cast<int64>(tbu0 == tbu1);
  // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is garbage)
  return (tbu1 << 32) | tbl;
 #elif defined(__sparc__)
  int64_t tick;
  asm(".byte 0x83, 0x41, 0x00, 0x00");
  asm("mov   %%g1, %0" : "=r"(tick));
  return tick;
 #elif defined(__ia64__)
  int64_t itc;
  asm("mov %0 = ar.itc" : "=r"(itc));
  return itc;
 #elif defined(COMPILER_MSVC) && defined(_M_IX86)
  // Older MSVC compilers (like 7.x) don't seem to support the
  // __rdtsc intrinsic properly, so I prefer to use _asm instead
  // when I know it will work.  Otherwise, I'll use __rdtsc and hope
  // the code is being compiled with a non-ancient compiler.
  _asm rdtsc
 #elif defined(COMPILER_MSVC)
  return __rdtsc();
 #elif defined(__aarch64__)
  // System timer of ARMv8 runs at a different frequency than the CPU's.
  // The frequency is fixed, typically in the range 1-50MHz.  It can be
  // read at CNTFRQ special register.  We assume the OS has set up
  // the virtual timer properly.
  int64_t virtual_timer_value;
  asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
  return virtual_timer_value;
 #elif defined(__ARM_ARCH)
 #if (__ARM_ARCH >= 6)  // V6 is the earliest arch that has a standard cyclecount
  uint32_t pmccntr;
  uint32_t pmuseren;
  uint32_t pmcntenset;
  // Read the user mode perf monitor counter access permissions.
  asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
  if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
    asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
    if (pmcntenset & 0x80000000ul) {  // Is it counting?
      asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
      // The counter is set up to count every 64th cycle
      return static_cast<int64_t>(pmccntr) * 64;  // Should optimize to << 6
    }
  }
 #endif
  struct timeval tv;
  gettimeofday(&tv, nullptr);
  return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
 #elif defined(__mips__)
  // mips apparently only allows rdtsc for superusers, so we fall
  // back to gettimeofday.  It's possible clock_gettime would be better.
  struct timeval tv;
  gettimeofday(&tv, nullptr);
  return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
 #else
 // The soft failover to a generic implementation is automatic only for ARM.
 // For other platforms the developer is expected to make an attempt to create
 // a fast implementation and use generic version if nothing better is available.
 #error You need to define CycleTimer for your OS and CPU
 #endif
 }
 }  // end namespace cycleclock
 }  // end namespace benchmark
 #endif  // BENCHMARK_CYCLECLOCK_H_
--- a/utils/google-benchmark/src/internal_macros.h
+++ b/utils/google-benchmark/src/internal_macros.h
@@ -0,0 +1,40 @@
 #ifndef BENCHMARK_INTERNAL_MACROS_H_
 #define BENCHMARK_INTERNAL_MACROS_H_
 #include "benchmark/macros.h"
 #ifndef __has_feature
 # define __has_feature(x) 0
 #endif
 #if __has_feature(cxx_attributes)
 # define BENCHMARK_NORETURN [[noreturn]]
 #elif defined(__GNUC__)
 # define BENCHMARK_NORETURN __attribute__((noreturn))
 #else
 # define BENCHMARK_NORETURN
 #endif
 #if defined(__CYGWIN__)
 # define BENCHMARK_OS_CYGWIN 1
 #elif defined(_WIN32)
 # define BENCHMARK_OS_WINDOWS 1
 #elif defined(__APPLE__)
 // TODO(ericwf) This doesn't actually check that it is a Mac OSX system. Just
 // that it is an apple system.
 # define BENCHMARK_OS_MACOSX 1
 #elif defined(__FreeBSD__)
 # define BENCHMARK_OS_FREEBSD 1
 #elif defined(__linux__)
 # define BENCHMARK_OS_LINUX 1
 #endif
 #if defined(__clang__)
 # define COMPILER_CLANG
 #elif defined(_MSC_VER)
 # define COMPILER_MSVC
 #elif defined(__GNUC__)
 # define COMPILER_GCC
 #endif
 #endif // BENCHMARK_INTERNAL_MACROS_H_
--- a/utils/google-benchmark/src/json_reporter.cc
+++ b/utils/google-benchmark/src/json_reporter.cc
@@ -0,0 +1,178 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "benchmark/reporter.h"
 #include "complexity.h"
 #include <algorithm>
 #include <cstdint>
 #include <iostream>
 #include <string>
 #include <tuple>
 #include <vector>
 #include "string_util.h"
 #include "walltime.h"
 namespace benchmark {
 namespace {
 std::string FormatKV(std::string const& key, std::string const& value) {
  return StringPrintF("\"%s\": \"%s\"", key.c_str(), value.c_str());
 }
 std::string FormatKV(std::string const& key, const char* value) {
  return StringPrintF("\"%s\": \"%s\"", key.c_str(), value);
 }
 std::string FormatKV(std::string const& key, bool value) {
  return StringPrintF("\"%s\": %s", key.c_str(), value ? "true" : "false");
 }
 std::string FormatKV(std::string const& key, int64_t value) {
  std::stringstream ss;
  ss << '"' << key << "\": " << value;
  return ss.str();
 }
 int64_t RoundDouble(double v) {
    return static_cast<int64_t>(v + 0.5);
 }
 } // end namespace
 bool JSONReporter::ReportContext(const Context& context) {
  std::ostream& out = GetOutputStream();
  out << "{\n";
  std::string inner_indent(2, ' ');
  // Open context block and print context information.
  out << inner_indent << "\"context\": {\n";
  std::string indent(4, ' ');
  std::string walltime_value = LocalDateTimeString();
  out << indent << FormatKV("date", walltime_value) << ",\n";
  out << indent
      << FormatKV("num_cpus", static_cast<int64_t>(context.num_cpus))
      << ",\n";
  out << indent
      << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu))
      << ",\n";
  out << indent
      << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled)
      << ",\n";
 #if defined(NDEBUG)
  const char build_type[] = "release";
 #else
  const char build_type[] = "debug";
 #endif
  out << indent << FormatKV("library_build_type", build_type) << "\n";
  // Close context block and open the list of benchmarks.
  out << inner_indent << "},\n";
  out << inner_indent << "\"benchmarks\": [\n";
  return true;
 }
 void JSONReporter::ReportRuns(std::vector<Run> const& reports) {
  if (reports.empty()) {
    return;
  }
  std::string indent(4, ' ');
  std::ostream& out = GetOutputStream();
  if (!first_report_) {
    out << ",\n";
  }
  first_report_ = false;
  for (auto it = reports.begin(); it != reports.end(); ++it) {
    out << indent << "{\n";
    PrintRunData(*it);
    out << indent << '}';
    auto it_cp = it;
    if (++it_cp != reports.end()) {
      out << ",\n";
    }
  }
 }
 void JSONReporter::Finalize() {
  // Close the list of benchmarks and the top level object.
  GetOutputStream() << "\n  ]\n}\n";
 }
 void JSONReporter::PrintRunData(Run const& run) {
  std::string indent(6, ' ');
  std::ostream& out = GetOutputStream();
    out << indent
        << FormatKV("name", run.benchmark_name)
        << ",\n";
    if (run.error_occurred) {
        out << indent
            << FormatKV("error_occurred", run.error_occurred)
            << ",\n";
        out << indent
            << FormatKV("error_message", run.error_message)
            << ",\n";
    }
  if (!run.report_big_o && !run.report_rms) {
        out << indent
            << FormatKV("iterations", run.iterations)
            << ",\n";
        out << indent
            << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime()))
            << ",\n";
        out << indent
            << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime()));
        out << ",\n" << indent
            << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
  } else if (run.report_big_o) {
    out << indent
        << FormatKV("cpu_coefficient", RoundDouble(run.GetAdjustedCPUTime()))
        << ",\n";
    out << indent
        << FormatKV("real_coefficient", RoundDouble(run.GetAdjustedRealTime()))
        << ",\n";
    out << indent
            << FormatKV("big_o", GetBigOString(run.complexity))
            << ",\n";
        out << indent
            << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
    } else if(run.report_rms) {
        out << indent
            << FormatKV("rms", RoundDouble(run.GetAdjustedCPUTime()*100))
            << '%';
  }
  if (run.bytes_per_second > 0.0) {
    out << ",\n"
        << indent
        << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second));
  }
  if (run.items_per_second > 0.0) {
    out << ",\n"
        << indent
        << FormatKV("items_per_second", RoundDouble(run.items_per_second));
  }
  if (!run.report_label.empty()) {
    out << ",\n"
        << indent
        << FormatKV("label", run.report_label);
  }
  out << '\n';
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/log.cc
+++ b/utils/google-benchmark/src/log.cc
@@ -0,0 +1,40 @@
 #include "log.h"
 #include <iostream>
 namespace benchmark {
 namespace internal {
 int& LoggingLevelImp() {
    static int level = 0;
    return level;
 }
 void SetLogLevel(int value) {
    LoggingLevelImp() = value;
 }
 int GetLogLevel() {
    return LoggingLevelImp();
 }
 class NullLogBuffer : public std::streambuf
 {
 public:
  int overflow(int c) {
    return c;
  }
 };
 std::ostream& GetNullLogInstance() {
  static NullLogBuffer log_buff;
  static std::ostream null_log(&log_buff);
  return null_log;
 }
 std::ostream& GetErrorLogInstance() {
  return std::clog;
 }
 } // end namespace internal
 } // end namespace benchmark
--- a/utils/google-benchmark/src/log.h
+++ b/utils/google-benchmark/src/log.h
@@ -0,0 +1,28 @@
 #ifndef BENCHMARK_LOG_H_
 #define BENCHMARK_LOG_H_
 #include <ostream>
 namespace benchmark {
 namespace internal {
 int GetLogLevel();
 void SetLogLevel(int level);
 std::ostream& GetNullLogInstance();
 std::ostream& GetErrorLogInstance();
 inline std::ostream& GetLogInstanceForLevel(int level) {
  if (level <= GetLogLevel()) {
    return GetErrorLogInstance();
  }
  return GetNullLogInstance();
 }
 } // end namespace internal
 } // end namespace benchmark
 #define VLOG(x) (::benchmark::internal::GetLogInstanceForLevel(x) \
                 << "-- LOG(" << x << "): ")
 #endif
--- a/utils/google-benchmark/src/mutex.h
+++ b/utils/google-benchmark/src/mutex.h
@@ -0,0 +1,142 @@
 #ifndef BENCHMARK_MUTEX_H_
 #define BENCHMARK_MUTEX_H_
 #include <mutex>
 #include <condition_variable>
 // Enable thread safety attributes only with clang.
 // The attributes can be safely erased when compiling with other compilers.
 #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
 #define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
 #else
 #define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
 #endif
 #define CAPABILITY(x) \
  THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
 #define SCOPED_CAPABILITY \
  THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
 #define GUARDED_BY(x) \
  THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
 #define PT_GUARDED_BY(x) \
  THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
 #define ACQUIRED_BEFORE(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
 #define ACQUIRED_AFTER(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
 #define REQUIRES(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__))
 #define REQUIRES_SHARED(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__))
 #define ACQUIRE(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__))
 #define ACQUIRE_SHARED(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__))
 #define RELEASE(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__))
 #define RELEASE_SHARED(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__))
 #define TRY_ACQUIRE(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__))
 #define TRY_ACQUIRE_SHARED(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
 #define EXCLUDES(...) \
  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
 #define ASSERT_CAPABILITY(x) \
  THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
 #define ASSERT_SHARED_CAPABILITY(x) \
  THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x))
 #define RETURN_CAPABILITY(x) \
  THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
 #define NO_THREAD_SAFETY_ANALYSIS \
  THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
 namespace benchmark {
 typedef std::condition_variable Condition;
 // NOTE: Wrappers for std::mutex and std::unique_lock are provided so that
 // we can annotate them with thread safety attributes and use the
 // -Wthread-safety warning with clang. The standard library types cannot be
 // used directly because they do not provided the required annotations.
 class CAPABILITY("mutex") Mutex
 {
 public:
  Mutex() {}
  void lock() ACQUIRE() { mut_.lock(); }
  void unlock() RELEASE() { mut_.unlock(); }
  std::mutex& native_handle() {
    return mut_;
  }
 private:
  std::mutex mut_;
 };
 class SCOPED_CAPABILITY MutexLock
 {
  typedef std::unique_lock<std::mutex> MutexLockImp;
 public:
  MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle())
  { }
  ~MutexLock() RELEASE() {}
  MutexLockImp& native_handle() { return ml_; }
 private:
  MutexLockImp ml_;
 };
 class Notification
 {
 public:
  Notification() : notified_yet_(false) { }
  void WaitForNotification() const EXCLUDES(mutex_) {
    MutexLock m_lock(mutex_);
    auto notified_fn = [this]() REQUIRES(mutex_) {
                            return this->HasBeenNotified();
                        };
    cv_.wait(m_lock.native_handle(), notified_fn);
  }
  void Notify() EXCLUDES(mutex_) {
    {
      MutexLock lock(mutex_);
      notified_yet_ = 1;
    }
    cv_.notify_all();
  }
 private:
  bool HasBeenNotified() const REQUIRES(mutex_) {
    return notified_yet_;
  }
  mutable Mutex mutex_;
  mutable std::condition_variable cv_;
  bool notified_yet_ GUARDED_BY(mutex_);
 };
 } // end namespace benchmark
 #endif // BENCHMARK_MUTEX_H_
--- a/utils/google-benchmark/src/re.h
+++ b/utils/google-benchmark/src/re.h
@@ -0,0 +1,60 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef BENCHMARK_RE_H_
 #define BENCHMARK_RE_H_
 #if defined(HAVE_STD_REGEX)
 #include <regex>
 #elif defined(HAVE_GNU_POSIX_REGEX)
 #include <gnuregex.h>
 #elif defined(HAVE_POSIX_REGEX)
 #include <regex.h>
 #else
 #error No regular expression backend was found!
 #endif
 #include <string>
 namespace benchmark {
 // A wrapper around the POSIX regular expression API that provides automatic
 // cleanup
 class Regex {
 public:
  Regex();
  ~Regex();
  // Compile a regular expression matcher from spec.  Returns true on success.
  //
  // On failure (and if error is not nullptr), error is populated with a human
  // readable error message if an error occurs.
  bool Init(const std::string& spec, std::string* error);
  // Returns whether str matches the compiled regular expression.
  bool Match(const std::string& str);
 private:
  bool init_;
  // Underlying regular expression object
 #if defined(HAVE_STD_REGEX)
  std::regex re_;
 #elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX)
  regex_t re_;
 #else
 # error No regular expression backend implementation available
 #endif
 };
 }  // end namespace benchmark
 #endif  // BENCHMARK_RE_H_
--- a/utils/google-benchmark/src/re_posix.cc
+++ b/utils/google-benchmark/src/re_posix.cc
@@ -0,0 +1,59 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "check.h"
 #include "re.h"
 namespace benchmark {
 Regex::Regex() : init_(false) { }
 bool Regex::Init(const std::string& spec, std::string* error) {
  int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB);
  if (ec != 0) {
    if (error) {
      size_t needed = regerror(ec, &re_, nullptr, 0);
      char* errbuf = new char[needed];
      regerror(ec, &re_, errbuf, needed);
      // regerror returns the number of bytes necessary to null terminate
      // the string, so we move that when assigning to error.
      CHECK_NE(needed, 0);
      error->assign(errbuf, needed - 1);
      delete[] errbuf;
    }
    return false;
  }
  init_ = true;
  return true;
 }
 Regex::~Regex() {
  if (init_) {
    regfree(&re_);
  }
 }
 bool Regex::Match(const std::string& str) {
  if (!init_) {
    return false;
  }
  return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0;
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/re_std.cc
+++ b/utils/google-benchmark/src/re_std.cc
@@ -0,0 +1,44 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "re.h"
 namespace benchmark {
 Regex::Regex() : init_(false) { }
 bool Regex::Init(const std::string& spec, std::string* error) {
  try {
    re_ = std::regex(spec, std::regex_constants::extended);
    init_ = true;
  } catch (const std::regex_error& e) {
    if (error) {
      *error = e.what();
    }
  }
  return init_;
 }
 Regex::~Regex() { }
 bool Regex::Match(const std::string& str) {
  if (!init_) {
    return false;
  }
  return std::regex_search(str, re_);
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/reporter.cc
+++ b/utils/google-benchmark/src/reporter.cc
@@ -0,0 +1,75 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "benchmark/reporter.h"
 #include "walltime.h"
 #include <cstdlib>
 #include <iostream>
 #include <vector>
 #include <tuple>
 #include "check.h"
 #include "stat.h"
 namespace benchmark {
 BenchmarkReporter::BenchmarkReporter()
    : output_stream_(&std::cout), error_stream_(&std::cerr)
 {
 }
 BenchmarkReporter::~BenchmarkReporter() {
 }
 void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr,
                                          Context const &context) {
  CHECK(out_ptr) << "cannot be null";
  auto& Out = *out_ptr;
  Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
            << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
  Out << LocalDateTimeString() << "\n";
  if (context.cpu_scaling_enabled) {
    Out << "***WARNING*** CPU scaling is enabled, the benchmark "
                 "real time measurements may be noisy and will incur extra "
                 "overhead.\n";
  }
 #ifndef NDEBUG
  Out << "***WARNING*** Library was built as DEBUG. Timings may be "
               "affected.\n";
 #endif
 }
 double BenchmarkReporter::Run::GetAdjustedRealTime() const {
  double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
  if (iterations != 0)
    new_time /= static_cast<double>(iterations);
  return new_time;
 }
 double BenchmarkReporter::Run::GetAdjustedCPUTime() const {
  double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit);
  if (iterations != 0)
    new_time /= static_cast<double>(iterations);
  return new_time;
 }
 } // end namespace benchmark
--- a/utils/google-benchmark/src/sleep.cc
+++ b/utils/google-benchmark/src/sleep.cc
@@ -0,0 +1,50 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "sleep.h"
 #include <cerrno>
 #include <ctime>
 #include "internal_macros.h"
 #ifdef BENCHMARK_OS_WINDOWS
 #include <Windows.h>
 #endif
 namespace benchmark {
 #ifdef BENCHMARK_OS_WINDOWS
 // Window's Sleep takes milliseconds argument.
 void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
 void SleepForSeconds(double seconds) {
  SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
 }
 #else   // BENCHMARK_OS_WINDOWS
 void SleepForMicroseconds(int microseconds) {
  struct timespec sleep_time;
  sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
  sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
    ;  // Ignore signals and wait for the full interval to elapse.
 }
 void SleepForMilliseconds(int milliseconds) {
  SleepForMicroseconds(static_cast<int>(milliseconds) * kNumMicrosPerMilli);
 }
 void SleepForSeconds(double seconds) {
  SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
 }
 #endif  // BENCHMARK_OS_WINDOWS
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/sleep.h
+++ b/utils/google-benchmark/src/sleep.h
@@ -0,0 +1,17 @@
 #ifndef BENCHMARK_SLEEP_H_
 #define BENCHMARK_SLEEP_H_
 #include <cstdint>
 namespace benchmark {
 const int64_t kNumMillisPerSecond = 1000LL;
 const int64_t kNumMicrosPerMilli = 1000LL;
 const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
 const int64_t kNumNanosPerMicro = 1000LL;
 const int64_t kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
 void SleepForMilliseconds(int milliseconds);
 void SleepForSeconds(double seconds);
 }  // end namespace benchmark
 #endif  // BENCHMARK_SLEEP_H_
--- a/utils/google-benchmark/src/stat.h
+++ b/utils/google-benchmark/src/stat.h
@@ -0,0 +1,307 @@
 #ifndef BENCHMARK_STAT_H_
 #define BENCHMARK_STAT_H_
 #include <cmath>
 #include <limits>
 #include <ostream>
 #include <type_traits>
 namespace benchmark {
 template <typename VType, typename NumType>
 class Stat1;
 template <typename VType, typename NumType>
 class Stat1MinMax;
 typedef Stat1<float, int64_t> Stat1_f;
 typedef Stat1<double, int64_t> Stat1_d;
 typedef Stat1MinMax<float, int64_t> Stat1MinMax_f;
 typedef Stat1MinMax<double, int64_t> Stat1MinMax_d;
 template <typename VType>
 class Vector2;
 template <typename VType>
 class Vector3;
 template <typename VType>
 class Vector4;
 template <typename VType, typename NumType>
 class Stat1 {
 public:
  typedef Stat1<VType, NumType> Self;
  Stat1() { Clear(); }
  // Create a sample of value dat and weight 1
  explicit Stat1(const VType &dat) {
    sum_ = dat;
    sum_squares_ = Sqr(dat);
    numsamples_ = 1;
  }
  // Create statistics for all the samples between begin (included)
  // and end(excluded)
  explicit Stat1(const VType *begin, const VType *end) {
    Clear();
    for (const VType *item = begin; item < end; ++item) {
      (*this) += Stat1(*item);
    }
  }
  // Create a sample of value dat and weight w
  Stat1(const VType &dat, const NumType &w) {
    sum_ = w * dat;
    sum_squares_ = w * Sqr(dat);
    numsamples_ = w;
  }
  // Copy operator
  Stat1(const Self &stat) {
    sum_ = stat.sum_;
    sum_squares_ = stat.sum_squares_;
    numsamples_ = stat.numsamples_;
  }
  void Clear() {
    numsamples_ = NumType();
    sum_squares_ = sum_ = VType();
  }
  Self &operator=(const Self &stat) {
    sum_ = stat.sum_;
    sum_squares_ = stat.sum_squares_;
    numsamples_ = stat.numsamples_;
    return (*this);
  }
  // Merge statistics from two sample sets.
  Self &operator+=(const Self &stat) {
    sum_ += stat.sum_;
    sum_squares_ += stat.sum_squares_;
    numsamples_ += stat.numsamples_;
    return (*this);
  }
  // The operation opposite to +=
  Self &operator-=(const Self &stat) {
    sum_ -= stat.sum_;
    sum_squares_ -= stat.sum_squares_;
    numsamples_ -= stat.numsamples_;
    return (*this);
  }
  // Multiply the weight of the set of samples by a factor k
  Self &operator*=(const VType &k) {
    sum_ *= k;
    sum_squares_ *= k;
    numsamples_ *= k;
    return (*this);
  }
  // Merge statistics from two sample sets.
  Self operator+(const Self &stat) const { return Self(*this) += stat; }
  // The operation opposite to +
  Self operator-(const Self &stat) const { return Self(*this) -= stat; }
  // Multiply the weight of the set of samples by a factor k
  Self operator*(const VType &k) const { return Self(*this) *= k; }
  // Return the total weight of this sample set
  NumType numSamples() const { return numsamples_; }
  // Return the sum of this sample set
  VType Sum() const { return sum_; }
  // Return the mean of this sample set
  VType Mean() const {
    if (numsamples_ == 0) return VType();
    return sum_ * (1.0 / numsamples_);
  }
  // Return the mean of this sample set and compute the standard deviation at
  // the same time.
  VType Mean(VType *stddev) const {
    if (numsamples_ == 0) return VType();
    VType mean = sum_ * (1.0 / numsamples_);
    if (stddev) {
      VType avg_squares = sum_squares_ * (1.0 / numsamples_);
      *stddev = Sqrt(avg_squares - Sqr(mean));
    }
    return mean;
  }
  // Return the standard deviation of the sample set
  VType StdDev() const {
    if (numsamples_ == 0) return VType();
    VType mean = Mean();
    VType avg_squares = sum_squares_ * (1.0 / numsamples_);
    return Sqrt(avg_squares - Sqr(mean));
  }
 private:
  static_assert(std::is_integral<NumType>::value &&
                !std::is_same<NumType, bool>::value,
                "NumType must be an integral type that is not bool.");
  // Let i be the index of the samples provided (using +=)
  // and weight[i],value[i] be the data of sample #i
  // then the variables have the following meaning:
  NumType numsamples_;  // sum of weight[i];
  VType sum_;           // sum of weight[i]*value[i];
  VType sum_squares_;   // sum of weight[i]*value[i]^2;
  // Template function used to square a number.
  // For a vector we square all components
  template <typename SType>
  static inline SType Sqr(const SType &dat) {
    return dat * dat;
  }
  template <typename SType>
  static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
    return dat.MulComponents(dat);
  }
  template <typename SType>
  static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
    return dat.MulComponents(dat);
  }
  template <typename SType>
  static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
    return dat.MulComponents(dat);
  }
  // Template function used to take the square root of a number.
  // For a vector we square all components
  template <typename SType>
  static inline SType Sqrt(const SType &dat) {
    // Avoid NaN due to imprecision in the calculations
    if (dat < 0) return 0;
    return sqrt(dat);
  }
  template <typename SType>
  static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
    // Avoid NaN due to imprecision in the calculations
    return Max(dat, Vector2<SType>()).Sqrt();
  }
  template <typename SType>
  static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
    // Avoid NaN due to imprecision in the calculations
    return Max(dat, Vector3<SType>()).Sqrt();
  }
  template <typename SType>
  static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
    // Avoid NaN due to imprecision in the calculations
    return Max(dat, Vector4<SType>()).Sqrt();
  }
 };
 // Useful printing function
 template <typename VType, typename NumType>
 std::ostream &operator<<(std::ostream &out, const Stat1<VType, NumType> &s) {
  out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
      << " nsamples = " << s.NumSamples() << "}";
  return out;
 }
 // Stat1MinMax: same as Stat1, but it also
 // keeps the Min and Max values; the "-"
 // operator is disabled because it cannot be implemented
 // efficiently
 template <typename VType, typename NumType>
 class Stat1MinMax : public Stat1<VType, NumType> {
 public:
  typedef Stat1MinMax<VType, NumType> Self;
  Stat1MinMax() { Clear(); }
  // Create a sample of value dat and weight 1
  explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
    max_ = dat;
    min_ = dat;
  }
  // Create statistics for all the samples between begin (included)
  // and end(excluded)
  explicit Stat1MinMax(const VType *begin, const VType *end) {
    Clear();
    for (const VType *item = begin; item < end; ++item) {
      (*this) += Stat1MinMax(*item);
    }
  }
  // Create a sample of value dat and weight w
  Stat1MinMax(const VType &dat, const NumType &w)
      : Stat1<VType, NumType>(dat, w) {
    max_ = dat;
    min_ = dat;
  }
  // Copy operator
  Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
    max_ = stat.max_;
    min_ = stat.min_;
  }
  void Clear() {
    Stat1<VType, NumType>::Clear();
    if (std::numeric_limits<VType>::has_infinity) {
      min_ = std::numeric_limits<VType>::infinity();
      max_ = -std::numeric_limits<VType>::infinity();
    } else {
      min_ = std::numeric_limits<VType>::max();
      max_ = std::numeric_limits<VType>::min();
    }
  }
  Self &operator=(const Self &stat) {
    this->Stat1<VType, NumType>::operator=(stat);
    max_ = stat.max_;
    min_ = stat.min_;
    return (*this);
  }
  // Merge statistics from two sample sets.
  Self &operator+=(const Self &stat) {
    this->Stat1<VType, NumType>::operator+=(stat);
    if (stat.max_ > max_) max_ = stat.max_;
    if (stat.min_ < min_) min_ = stat.min_;
    return (*this);
  }
  // Multiply the weight of the set of samples by a factor k
  Self &operator*=(const VType &stat) {
    this->Stat1<VType, NumType>::operator*=(stat);
    return (*this);
  }
  // Merge statistics from two sample sets.
  Self operator+(const Self &stat) const { return Self(*this) += stat; }
  // Multiply the weight of the set of samples by a factor k
  Self operator*(const VType &k) const { return Self(*this) *= k; }
  // Return the maximal value in this sample set
  VType Max() const { return max_; }
  // Return the minimal value in this sample set
  VType Min() const { return min_; }
 private:
  // The - operation makes no sense with Min/Max
  // unless we keep the full list of values (but we don't)
  // make it private, and let it undefined so nobody can call it
  Self &operator-=(const Self &stat);  // senseless. let it undefined.
  // The operation opposite to -
  Self operator-(const Self &stat) const;  // senseless. let it undefined.
  // Let i be the index of the samples provided (using +=)
  // and weight[i],value[i] be the data of sample #i
  // then the variables have the following meaning:
  VType max_;  // max of value[i]
  VType min_;  // min of value[i]
 };
 // Useful printing function
 template <typename VType, typename NumType>
 std::ostream &operator<<(std::ostream &out,
                         const Stat1MinMax<VType, NumType> &s) {
  out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
      << " nsamples = " << s.NumSamples() << " min = " << s.Min()
      << " max = " << s.Max() << "}";
  return out;
 }
 }  // end namespace benchmark
 #endif  // BENCHMARK_STAT_H_
--- a/utils/google-benchmark/src/string_util.cc
+++ b/utils/google-benchmark/src/string_util.cc
@@ -0,0 +1,169 @@
 #include "string_util.h"
 #include <cmath>
 #include <cstdarg>
 #include <array>
 #include <memory>
 #include <sstream>
 #include <stdio.h>
 #include "arraysize.h"
 namespace benchmark {
 namespace {
 // kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
 const char kBigSIUnits[] = "kMGTPEZY";
 // Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
 const char kBigIECUnits[] = "KMGTPEZY";
 // milli, micro, nano, pico, femto, atto, zepto, yocto.
 const char kSmallSIUnits[] = "munpfazy";
 // We require that all three arrays have the same size.
 static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
              "SI and IEC unit arrays must be the same size");
 static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
              "Small SI and Big SI unit arrays must be the same size");
 static const int64_t kUnitsSize = arraysize(kBigSIUnits);
 } // end anonymous namespace
 void ToExponentAndMantissa(double val, double thresh, int precision,
                           double one_k, std::string* mantissa,
                           int64_t* exponent) {
  std::stringstream mantissa_stream;
  if (val < 0) {
    mantissa_stream << "-";
    val = -val;
  }
  // Adjust threshold so that it never excludes things which can't be rendered
  // in 'precision' digits.
  const double adjusted_threshold =
      std::max(thresh, 1.0 / std::pow(10.0, precision));
  const double big_threshold = adjusted_threshold * one_k;
  const double small_threshold = adjusted_threshold;
  if (val > big_threshold) {
    // Positive powers
    double scaled = val;
    for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) {
      scaled /= one_k;
      if (scaled <= big_threshold) {
        mantissa_stream << scaled;
        *exponent = i + 1;
        *mantissa = mantissa_stream.str();
        return;
      }
    }
    mantissa_stream << val;
    *exponent = 0;
  } else if (val < small_threshold) {
    // Negative powers
    double scaled = val;
    for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) {
      scaled *= one_k;
      if (scaled >= small_threshold) {
        mantissa_stream << scaled;
        *exponent = -static_cast<int64_t>(i + 1);
        *mantissa = mantissa_stream.str();
        return;
      }
    }
    mantissa_stream << val;
    *exponent = 0;
  } else {
    mantissa_stream << val;
    *exponent = 0;
  }
  *mantissa = mantissa_stream.str();
 }
 std::string ExponentToPrefix(int64_t exponent, bool iec) {
  if (exponent == 0) return "";
  const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1);
  if (index >= kUnitsSize) return "";
  const char* array =
      (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
  if (iec)
    return array[index] + std::string("i");
  else
    return std::string(1, array[index]);
 }
 std::string ToBinaryStringFullySpecified(double value, double threshold,
                                         int precision) {
  std::string mantissa;
  int64_t exponent;
  ToExponentAndMantissa(value, threshold, precision, 1024.0, &mantissa,
                        &exponent);
  return mantissa + ExponentToPrefix(exponent, false);
 }
 void AppendHumanReadable(int n, std::string* str) {
  std::stringstream ss;
  // Round down to the nearest SI prefix.
  ss << "/" << ToBinaryStringFullySpecified(n, 1.0, 0);
  *str += ss.str();
 }
 std::string HumanReadableNumber(double n) {
  // 1.1 means that figures up to 1.1k should be shown with the next unit down;
  // this softens edge effects.
  // 1 means that we should show one decimal place of precision.
  return ToBinaryStringFullySpecified(n, 1.1, 1);
 }
 std::string StringPrintFImp(const char *msg, va_list args)
 {
  // we might need a second shot at this, so pre-emptivly make a copy
  va_list args_cp;
  va_copy(args_cp, args);
  // TODO(ericwf): use std::array for first attempt to avoid one memory
  // allocation guess what the size might be
  std::array<char, 256> local_buff;
  std::size_t size = local_buff.size();
  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation in the android-ndk
  auto ret = vsnprintf(local_buff.data(), size, msg, args_cp);
  va_end(args_cp);
  // handle empty expansion
  if (ret == 0)
    return std::string{};
  if (static_cast<std::size_t>(ret) < size)
    return std::string(local_buff.data());
  // we did not provide a long enough buffer on our first attempt.
  // add 1 to size to account for null-byte in size cast to prevent overflow
  size = static_cast<std::size_t>(ret) + 1;
  auto buff_ptr = std::unique_ptr<char[]>(new char[size]);
  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation in the android-ndk
  ret = vsnprintf(buff_ptr.get(), size, msg, args);
  return std::string(buff_ptr.get());
 }
 std::string StringPrintF(const char* format, ...)
 {
  va_list args;
  va_start(args, format);
  std::string tmp = StringPrintFImp(format, args);
  va_end(args);
  return tmp;
 }
 void ReplaceAll(std::string* str, const std::string& from,
                const std::string& to) {
  std::size_t start = 0;
  while((start = str->find(from, start)) != std::string::npos) {
    str->replace(start, from.length(), to);
    start += to.length();
  }
 }
 } // end namespace benchmark
--- a/utils/google-benchmark/src/string_util.h
+++ b/utils/google-benchmark/src/string_util.h
@@ -0,0 +1,44 @@
 #ifndef BENCHMARK_STRING_UTIL_H_
 #define BENCHMARK_STRING_UTIL_H_
 #include <string>
 #include <sstream>
 #include <utility>
 #include "internal_macros.h"
 namespace benchmark {
 void AppendHumanReadable(int n, std::string* str);
 std::string HumanReadableNumber(double n);
 std::string StringPrintF(const char* format, ...);
 inline std::ostream&
 StringCatImp(std::ostream& out) BENCHMARK_NOEXCEPT
 {
  return out;
 }
 template <class First, class ...Rest>
 inline std::ostream&
 StringCatImp(std::ostream& out, First&& f, Rest&&... rest)
 {
  out << std::forward<First>(f);
  return StringCatImp(out, std::forward<Rest>(rest)...);
 }
 template<class ...Args>
 inline std::string StrCat(Args&&... args)
 {
  std::ostringstream ss;
  StringCatImp(ss, std::forward<Args>(args)...);
  return ss.str();
 }
 void ReplaceAll(std::string* str, const std::string& from,
                const std::string& to);
 } // end namespace benchmark
 #endif // BENCHMARK_STRING_UTIL_H_
--- a/utils/google-benchmark/src/sysinfo.cc
+++ b/utils/google-benchmark/src/sysinfo.cc
@@ -0,0 +1,420 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "sysinfo.h"
 #include "internal_macros.h"
 #ifdef BENCHMARK_OS_WINDOWS
 #include <Shlwapi.h>
 #include <Windows.h>
 #include <VersionHelpers.h>
 #else
 #include <fcntl.h>
 #include <sys/resource.h>
 #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
 #include <sys/time.h>
 #include <unistd.h>
 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
 #include <sys/sysctl.h>
 #endif
 #endif
 #include <cerrno>
 #include <cstdio>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
 #include <limits>
 #include <mutex>
 #include "arraysize.h"
 #include "check.h"
 #include "cycleclock.h"
 #include "internal_macros.h"
 #include "log.h"
 #include "sleep.h"
 #include "string_util.h"
 namespace benchmark {
 namespace {
 std::once_flag cpuinfo_init;
 double cpuinfo_cycles_per_second = 1.0;
 int cpuinfo_num_cpus = 1;  // Conservative guess
 std::mutex cputimens_mutex;
 #if !defined BENCHMARK_OS_MACOSX
 const int64_t estimate_time_ms = 1000;
 // Helper function estimates cycles/sec by observing cycles elapsed during
 // sleep(). Using small sleep time decreases accuracy significantly.
 int64_t EstimateCyclesPerSecond() {
  const int64_t start_ticks = cycleclock::Now();
  SleepForMilliseconds(estimate_time_ms);
  return cycleclock::Now() - start_ticks;
 }
 #endif
 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
 // Helper function for reading an int from a file. Returns true if successful
 // and the memory location pointed to by value is set to the value read.
 bool ReadIntFromFile(const char* file, long* value) {
  bool ret = false;
  int fd = open(file, O_RDONLY);
  if (fd != -1) {
    char line[1024];
    char* err;
    memset(line, '\0', sizeof(line));
    CHECK(read(fd, line, sizeof(line) - 1));
    const long temp_value = strtol(line, &err, 10);
    if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
      *value = temp_value;
      ret = true;
    }
    close(fd);
  }
  return ret;
 }
 #endif
 void InitializeSystemInfo() {
 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
  char line[1024];
  char* err;
  long freq;
  bool saw_mhz = false;
  // If the kernel is exporting the tsc frequency use that. There are issues
  // where cpuinfo_max_freq cannot be relied on because the BIOS may be
  // exporintg an invalid p-state (on x86) or p-states may be used to put the
  // processor in a new mode (turbo mode). Essentially, those frequencies
  // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
  // well.
  if (!saw_mhz &&
      ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
    // The value is in kHz (as the file name suggests).  For example, on a
    // 2GHz warpstation, the file contains the value "2000000".
    cpuinfo_cycles_per_second = freq * 1000.0;
    saw_mhz = true;
  }
  // If CPU scaling is in effect, we want to use the *maximum* frequency,
  // not whatever CPU speed some random processor happens to be using now.
  if (!saw_mhz &&
      ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
                      &freq)) {
    // The value is in kHz.  For example, on a 2GHz warpstation, the file
    // contains the value "2000000".
    cpuinfo_cycles_per_second = freq * 1000.0;
    saw_mhz = true;
  }
  // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
  const char* pname = "/proc/cpuinfo";
  int fd = open(pname, O_RDONLY);
  if (fd == -1) {
    perror(pname);
    if (!saw_mhz) {
      cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
    }
    return;
  }
  double bogo_clock = 1.0;
  bool saw_bogo = false;
  long max_cpu_id = 0;
  int num_cpus = 0;
  line[0] = line[1] = '\0';
  size_t chars_read = 0;
  do {  // we'll exit when the last read didn't read anything
    // Move the next line to the beginning of the buffer
    const size_t oldlinelen = strlen(line);
    if (sizeof(line) == oldlinelen + 1)  // oldlinelen took up entire line
      line[0] = '\0';
    else  // still other lines left to save
      memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
    // Terminate the new line, reading more if we can't find the newline
    char* newline = strchr(line, '\n');
    if (newline == nullptr) {
      const size_t linelen = strlen(line);
      const size_t bytes_to_read = sizeof(line) - 1 - linelen;
      CHECK(bytes_to_read > 0);  // because the memmove recovered >=1 bytes
      chars_read = read(fd, line + linelen, bytes_to_read);
      line[linelen + chars_read] = '\0';
      newline = strchr(line, '\n');
    }
    if (newline != nullptr) *newline = '\0';
    // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
    // accept postive values. Some environments (virtual machines) report zero,
    // which would cause infinite looping in WallTime_Init.
    if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) {
      const char* freqstr = strchr(line, ':');
      if (freqstr) {
        cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
        if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
          saw_mhz = true;
      }
    } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) {
      const char* freqstr = strchr(line, ':');
      if (freqstr) {
        bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
        if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
          saw_bogo = true;
      }
    } else if (strncmp(line, "processor", sizeof("processor") - 1) == 0) {
      // The above comparison is case-sensitive because ARM kernels often
      // include a "Processor" line that tells you about the CPU, distinct
      // from the usual "processor" lines that give you CPU ids. No current
      // Linux architecture is using "Processor" for CPU ids.
      num_cpus++;  // count up every time we see an "processor :" entry
      const char* id_str = strchr(line, ':');
      if (id_str) {
        const long cpu_id = strtol(id_str + 1, &err, 10);
        if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
          max_cpu_id = cpu_id;
      }
    }
  } while (chars_read > 0);
  close(fd);
  if (!saw_mhz) {
    if (saw_bogo) {
      // If we didn't find anything better, we'll use bogomips, but
      // we're not happy about it.
      cpuinfo_cycles_per_second = bogo_clock;
    } else {
      // If we don't even have bogomips, we'll use the slow estimation.
      cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
    }
  }
  if (num_cpus == 0) {
    fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
  } else {
    if ((max_cpu_id + 1) != num_cpus) {
      fprintf(stderr,
              "CPU ID assignments in /proc/cpuinfo seem messed up."
              " This is usually caused by a bad BIOS.\n");
    }
    cpuinfo_num_cpus = num_cpus;
  }
 #elif defined BENCHMARK_OS_FREEBSD
 // For this sysctl to work, the machine must be configured without
 // SMP, APIC, or APM support.  hz should be 64-bit in freebsd 7.0
 // and later.  Before that, it's a 32-bit quantity (and gives the
 // wrong answer on machines faster than 2^32 Hz).  See
 //  http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
 // But also compare FreeBSD 7.0:
 //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
 //  231         error = sysctl_handle_quad(oidp, &freq, 0, req);
 // To FreeBSD 6.3 (it's the same in 6-STABLE):
 //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
 //  139         error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
 #if __FreeBSD__ >= 7
  uint64_t hz = 0;
 #else
  unsigned int hz = 0;
 #endif
  size_t sz = sizeof(hz);
  const char* sysctl_path = "machdep.tsc_freq";
  if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) {
    fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
            sysctl_path, strerror(errno));
    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
  } else {
    cpuinfo_cycles_per_second = hz;
  }
 // TODO: also figure out cpuinfo_num_cpus
 #elif defined BENCHMARK_OS_WINDOWS
  // In NT, read MHz from the registry. If we fail to do so or we're in win9x
  // then make a crude estimate.
  DWORD data, data_size = sizeof(data);
  if (IsWindowsXPOrGreater() &&
      SUCCEEDED(
          SHGetValueA(HKEY_LOCAL_MACHINE,
                      "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
                      "~MHz", nullptr, &data, &data_size)))
    cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
  else
    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
 // TODO: also figure out cpuinfo_num_cpus
 #elif defined BENCHMARK_OS_MACOSX
  // returning "mach time units" per second. the current number of elapsed
  // mach time units can be found by calling uint64 mach_absolute_time();
  // while not as precise as actual CPU cycles, it is accurate in the face
  // of CPU frequency scaling and multi-cpu/core machines.
  // Our mac users have these types of machines, and accuracy
  // (i.e. correctness) trumps precision.
  // See cycleclock.h: CycleClock::Now(), which returns number of mach time
  // units on Mac OS X.
  mach_timebase_info_data_t timebase_info;
  mach_timebase_info(&timebase_info);
  double mach_time_units_per_nanosecond =
      static_cast<double>(timebase_info.denom) /
      static_cast<double>(timebase_info.numer);
  cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
  int num_cpus = 0;
  size_t size = sizeof(num_cpus);
  int numcpus_name[] = {CTL_HW, HW_NCPU};
  if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) ==
          0 &&
      (size == sizeof(num_cpus)))
    cpuinfo_num_cpus = num_cpus;
 #else
  // Generic cycles per second counter
  cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
 #endif
 }
 }  // end namespace
 // getrusage() based implementation of MyCPUUsage
 static double MyCPUUsageRUsage() {
 #ifndef BENCHMARK_OS_WINDOWS
  struct rusage ru;
  if (getrusage(RUSAGE_SELF, &ru) == 0) {
    return (static_cast<double>(ru.ru_utime.tv_sec) +
            static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
            static_cast<double>(ru.ru_stime.tv_sec) +
            static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
  } else {
    return 0.0;
  }
 #else
  HANDLE proc = GetCurrentProcess();
  FILETIME creation_time;
  FILETIME exit_time;
  FILETIME kernel_time;
  FILETIME user_time;
  ULARGE_INTEGER kernel;
  ULARGE_INTEGER user;
  GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
  kernel.HighPart = kernel_time.dwHighDateTime;
  kernel.LowPart = kernel_time.dwLowDateTime;
  user.HighPart = user_time.dwHighDateTime;
  user.LowPart = user_time.dwLowDateTime;
  return (static_cast<double>(kernel.QuadPart) +
          static_cast<double>(user.QuadPart)) * 1e-7;
 #endif  // OS_WINDOWS
 }
 #ifndef BENCHMARK_OS_WINDOWS
 static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
  static int cputime_fd = -1;
  if (cputime_fd == -1) {
    cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
    if (cputime_fd < 0) {
      cputime_fd = -1;
      return false;
    }
  }
  char buff[64];
  memset(buff, 0, sizeof(buff));
  if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
    close(cputime_fd);
    cputime_fd = -1;
    return false;
  }
  unsigned long long result = strtoull(buff, nullptr, 0);
  if (result == (std::numeric_limits<unsigned long long>::max)()) {
    close(cputime_fd);
    cputime_fd = -1;
    return false;
  }
  *cputime = static_cast<double>(result) / 1e9;
  return true;
 }
 #endif  // OS_WINDOWS
 double MyCPUUsage() {
 #ifndef BENCHMARK_OS_WINDOWS
  {
    std::lock_guard<std::mutex> l(cputimens_mutex);
    static bool use_cputime_ns = true;
    if (use_cputime_ns) {
      double value;
      if (MyCPUUsageCPUTimeNsLocked(&value)) {
        return value;
      }
      // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
      VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
      use_cputime_ns = false;
    }
  }
 #endif  // OS_WINDOWS
  return MyCPUUsageRUsage();
 }
 double ChildrenCPUUsage() {
 #ifndef BENCHMARK_OS_WINDOWS
  struct rusage ru;
  if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
    return (static_cast<double>(ru.ru_utime.tv_sec) +
            static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
            static_cast<double>(ru.ru_stime.tv_sec) +
            static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
  } else {
    return 0.0;
  }
 #else
  // TODO: Not sure what this even means on Windows
  return 0.0;
 #endif  // OS_WINDOWS
 }
 double CyclesPerSecond(void) {
  std::call_once(cpuinfo_init, InitializeSystemInfo);
  return cpuinfo_cycles_per_second;
 }
 int NumCPUs(void) {
  std::call_once(cpuinfo_init, InitializeSystemInfo);
  return cpuinfo_num_cpus;
 }
 // The ""'s catch people who don't pass in a literal for "str"
 #define strliterallen(str) (sizeof("" str "") - 1)
 // Must use a string literal for prefix.
 #define memprefix(str, len, prefix)                       \
  ((((len) >= strliterallen(prefix)) &&                   \
    std::memcmp(str, prefix, strliterallen(prefix)) == 0) \
       ? str + strliterallen(prefix)                      \
       : nullptr)
 bool CpuScalingEnabled() {
 #ifndef BENCHMARK_OS_WINDOWS
  // On Linux, the CPUfreq subsystem exposes CPU information as files on the
  // local file system. If reading the exported files fails, then we may not be
  // running on Linux, so we silently ignore all the read errors.
  for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
    std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu,
                                       "/cpufreq/scaling_governor");
    FILE* file = fopen(governor_file.c_str(), "r");
    if (!file) break;
    char buff[16];
    size_t bytes_read = fread(buff, 1, sizeof(buff), file);
    fclose(file);
    if (memprefix(buff, bytes_read, "performance") == nullptr) return true;
  }
 #endif
  return false;
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/sysinfo.h
+++ b/utils/google-benchmark/src/sysinfo.h
@@ -0,0 +1,12 @@
 #ifndef BENCHMARK_SYSINFO_H_
 #define BENCHMARK_SYSINFO_H_
 namespace benchmark {
 double MyCPUUsage();
 double ChildrenCPUUsage();
 int NumCPUs();
 double CyclesPerSecond();
 bool CpuScalingEnabled();
 }  // end namespace benchmark
 #endif  // BENCHMARK_SYSINFO_H_
--- a/utils/google-benchmark/src/walltime.cc
+++ b/utils/google-benchmark/src/walltime.cc
@@ -0,0 +1,263 @@
 // Copyright 2015 Google Inc. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "benchmark/macros.h"
 #include "internal_macros.h"
 #include "walltime.h"
 #if defined(BENCHMARK_OS_WINDOWS)
 #include <time.h>
 #include <winsock.h> // for timeval
 #else
 #include <sys/time.h>
 #endif
 #include <cstdio>
 #include <cstdint>
 #include <cstring>
 #include <ctime>
 #include <atomic>
 #include <chrono>
 #include <limits>
 #include "arraysize.h"
 #include "check.h"
 #include "cycleclock.h"
 #include "log.h"
 #include "sysinfo.h"
 namespace benchmark {
 namespace walltime {
 namespace {
 #if defined(HAVE_STEADY_CLOCK)
 template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
 struct ChooseSteadyClock {
    typedef std::chrono::high_resolution_clock type;
 };
 template <>
 struct ChooseSteadyClock<false> {
    typedef std::chrono::steady_clock type;
 };
 #endif
 struct ChooseClockType {
 #if defined(HAVE_STEADY_CLOCK)
  typedef ChooseSteadyClock<>::type type;
 #else
  typedef std::chrono::high_resolution_clock type;
 #endif
 };
 class WallTimeImp
 {
 public:
  WallTime Now();
  static WallTimeImp& GetWallTimeImp() {
    static WallTimeImp* imp = new WallTimeImp();
    return *imp;
  }
 private:
  WallTimeImp();
  // Helper routines to load/store a float from an AtomicWord. Required because
  // g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to
  // get rid of this horror show.
  void SetDrift(float f) {
    int32_t w;
    memcpy(&w, &f, sizeof(f));
    std::atomic_store(&drift_adjust_, w);
  }
  float GetDrift() const {
    float f;
    int32_t w = std::atomic_load(&drift_adjust_);
    memcpy(&f, &w, sizeof(f));
    return f;
  }
  WallTime Slow() const {
    struct timeval tv;
 #if defined(BENCHMARK_OS_WINDOWS)
    FILETIME    file_time;
    SYSTEMTIME  system_time;
    ULARGE_INTEGER ularge;
    const unsigned __int64 epoch = 116444736000000000LL;
    GetSystemTime(&system_time);
    SystemTimeToFileTime(&system_time, &file_time);
    ularge.LowPart = file_time.dwLowDateTime;
    ularge.HighPart = file_time.dwHighDateTime;
    tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000));
    tv.tv_usec = (long)(system_time.wMilliseconds * 1000);
 #else
    gettimeofday(&tv, nullptr);
 #endif
    return tv.tv_sec + tv.tv_usec * 1e-6;
  }
 private:
  static_assert(sizeof(float) <= sizeof(int32_t),
               "type sizes don't allow the drift_adjust hack");
  WallTime base_walltime_;
  int64_t base_cycletime_;
  int64_t cycles_per_second_;
  double seconds_per_cycle_;
  uint32_t last_adjust_time_;
  std::atomic<int32_t> drift_adjust_;
  int64_t max_interval_cycles_;
  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp);
 };
 WallTime WallTimeImp::Now() {
  WallTime now = 0.0;
  WallTime result = 0.0;
  int64_t ct = 0;
  uint32_t top_bits = 0;
  do {
    ct = cycleclock::Now();
    int64_t cycle_delta = ct - base_cycletime_;
    result = base_walltime_ + cycle_delta * seconds_per_cycle_;
    top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
    // Recompute drift no more often than every 2^32 cycles.
    // I.e., @2GHz, ~ every two seconds
    if (top_bits == last_adjust_time_) {  // don't need to recompute drift
      return result + GetDrift();
    }
    now = Slow();
  } while (cycleclock::Now() - ct > max_interval_cycles_);
  // We are now sure that "now" and "result" were produced within
  // kMaxErrorInterval of one another.
  SetDrift(static_cast<float>(now - result));
  last_adjust_time_ = top_bits;
  return now;
 }
 WallTimeImp::WallTimeImp()
    : base_walltime_(0.0), base_cycletime_(0),
      cycles_per_second_(0), seconds_per_cycle_(0.0),
      last_adjust_time_(0), drift_adjust_(0),
      max_interval_cycles_(0) {
  const double kMaxErrorInterval = 100e-6;
  cycles_per_second_ = static_cast<int64_t>(CyclesPerSecond());
  CHECK(cycles_per_second_ != 0);
  seconds_per_cycle_ = 1.0 / cycles_per_second_;
  max_interval_cycles_ =
      static_cast<int64_t>(cycles_per_second_ * kMaxErrorInterval);
  do {
    base_cycletime_ = cycleclock::Now();
    base_walltime_ = Slow();
  } while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_);
  // We are now sure that "base_walltime" and "base_cycletime" were produced
  // within kMaxErrorInterval of one another.
  SetDrift(0.0);
  last_adjust_time_ = static_cast<uint32_t>(uint64_t(base_cycletime_) >> 32);
 }
 WallTime CPUWalltimeNow() {
  static WallTimeImp& imp = WallTimeImp::GetWallTimeImp();
  return imp.Now();
 }
 WallTime ChronoWalltimeNow() {
  typedef ChooseClockType::type Clock;
  typedef std::chrono::duration<WallTime, std::chrono::seconds::period>
          FPSeconds;
  static_assert(std::chrono::treat_as_floating_point<WallTime>::value,
                "This type must be treated as a floating point type.");
  auto now = Clock::now().time_since_epoch();
  return std::chrono::duration_cast<FPSeconds>(now).count();
 }
 bool UseCpuCycleClock() {
    bool useWallTime = !CpuScalingEnabled();
    if (useWallTime) {
        VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n";
    } else {
        VLOG(1) << "Using std::chrono to provide walltime::Now().\n";
    }
    return useWallTime;
 }
 } // end anonymous namespace
 // WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is
 // enabled at the start of the program then std::chrono::system_clock is used
 // instead.
 WallTime Now()
 {
  static bool useCPUClock = UseCpuCycleClock();
  if (useCPUClock) {
    return CPUWalltimeNow();
  } else {
    return ChronoWalltimeNow();
  }
 }
 }  // end namespace walltime
 namespace {
 std::string DateTimeString(bool local) {
  typedef std::chrono::system_clock Clock;
  std::time_t now = Clock::to_time_t(Clock::now());
  char storage[128];
  std::size_t written;
  if (local) {
 #if defined(BENCHMARK_OS_WINDOWS)
    written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
 #else
    std::tm timeinfo;
    std::memset(&timeinfo, 0, sizeof(std::tm));
    ::localtime_r(&now, &timeinfo);
    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
 #endif
  } else {
 #if defined(BENCHMARK_OS_WINDOWS)
    written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
 #else
    std::tm timeinfo;
    std::memset(&timeinfo, 0, sizeof(std::tm));
    ::gmtime_r(&now, &timeinfo);
    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
 #endif
  }
  CHECK(written < arraysize(storage));
  ((void)written); // prevent unused variable in optimized mode.
  return std::string(storage);
 }
 } // end namespace
 std::string LocalDateTimeString() {
  return DateTimeString(true);
 }
 }  // end namespace benchmark
--- a/utils/google-benchmark/src/walltime.h
+++ b/utils/google-benchmark/src/walltime.h
@@ -0,0 +1,17 @@
 #ifndef BENCHMARK_WALLTIME_H_
 #define BENCHMARK_WALLTIME_H_
 #include <string>
 namespace benchmark {
 typedef double WallTime;
 namespace walltime {
 WallTime Now();
 }  // end namespace walltime
 std::string LocalDateTimeString();
 }  // end namespace benchmark
 #endif  // BENCHMARK_WALLTIME_H_
--- a/utils/google-benchmark/test/CMakeLists.txt
+++ b/utils/google-benchmark/test/CMakeLists.txt
@@ -0,0 +1,111 @@
 # Enable the tests
 find_package(Threads REQUIRED)
 macro(compile_benchmark_test name)
  add_executable(${name} "${name}.cc")
  target_link_libraries(${name} benchmark ${CMAKE_THREAD_LIBS_INIT})
 endmacro(compile_benchmark_test)
 # Demonstration executable
 compile_benchmark_test(benchmark_test)
 add_test(benchmark benchmark_test --benchmark_min_time=0.01)
 compile_benchmark_test(filter_test)
 macro(add_filter_test name filter expect)
  add_test(${name} filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect})
  add_test(${name}_list_only filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
 endmacro(add_filter_test)
 add_filter_test(filter_simple "Foo" 3)
 add_filter_test(filter_suffix "BM_.*" 4)
 add_filter_test(filter_regex_all ".*" 5)
 add_filter_test(filter_regex_blank "" 5)
 add_filter_test(filter_regex_none "monkey" 0)
 add_filter_test(filter_regex_wildcard ".*Foo.*" 3)
 add_filter_test(filter_regex_begin "^BM_.*" 4)
 add_filter_test(filter_regex_begin2 "^N" 1)
 add_filter_test(filter_regex_end ".*Ba$" 1)
 compile_benchmark_test(options_test)
 add_test(options_benchmarks options_test --benchmark_min_time=0.01)
 compile_benchmark_test(basic_test)
 add_test(basic_benchmark basic_test --benchmark_min_time=0.01)
 compile_benchmark_test(diagnostics_test)
 add_test(diagnostics_test diagnostics_test --benchmark_min_time=0.01)
 compile_benchmark_test(skip_with_error_test)
 add_test(skip_with_error_test skip_with_error_test --benchmark_min_time=0.01)
 compile_benchmark_test(donotoptimize_test)
 add_test(donotoptimize_test donotoptimize_test --benchmark_min_time=0.01)
 compile_benchmark_test(fixture_test)
 add_test(fixture_test fixture_test --benchmark_min_time=0.01)
 compile_benchmark_test(map_test)
 add_test(map_test map_test --benchmark_min_time=0.01)
 compile_benchmark_test(reporter_output_test)
 add_test(reporter_output_test reporter_output_test --benchmark_min_time=0.01)
 check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG)
 if (BENCHMARK_HAS_CXX03_FLAG)
  set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}")
  string(REPLACE "-std=c++11" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}")
  string(REPLACE "-std=c++0x" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}")
  compile_benchmark_test(cxx03_test)
  set_target_properties(cxx03_test
      PROPERTIES COMPILE_FLAGS "${CXX03_FLAGS}")
  add_test(cxx03 cxx03_test --benchmark_min_time=0.01)
 endif()
 compile_benchmark_test(complexity_test)
 add_test(complexity_benchmark complexity_test --benchmark_min_time=0.01)
 # Add the coverage command(s)
 if(CMAKE_BUILD_TYPE)
  string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER)
 endif()
 if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage")
  find_program(GCOV gcov)
  find_program(LCOV lcov)
  find_program(GENHTML genhtml)
  find_program(CTEST ctest)
  if (GCOV AND LCOV AND GENHTML AND CTEST AND HAVE_CXX_FLAG_COVERAGE)
    add_custom_command(
      OUTPUT ${CMAKE_BINARY_DIR}/lcov/index.html
      COMMAND ${LCOV} -q -z -d .
      COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o before.lcov -i
      COMMAND ${CTEST} --force-new-ctest-process
      COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o after.lcov
      COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov
      COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov
      COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark
      DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test complexity_test
      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
      COMMENT "Running LCOV"
    )
    add_custom_target(coverage
      DEPENDS ${CMAKE_BINARY_DIR}/lcov/index.html
      COMMENT "LCOV report at lcov/index.html"
    )
    message(STATUS "Coverage command added")
  else()
    if (HAVE_CXX_FLAG_COVERAGE)
      set(CXX_FLAG_COVERAGE_MESSAGE supported)
    else()
      set(CXX_FLAG_COVERAGE_MESSAGE unavailable)
    endif()
    message(WARNING
      "Coverage not available:\n"
      "  gcov: ${GCOV}\n"
      "  lcov: ${LCOV}\n"
      "  genhtml: ${GENHTML}\n"
      "  ctest: ${CTEST}\n"
      "  --coverage flag: ${CXX_FLAG_COVERAGE_MESSAGE}")
  endif()
 endif()
--- a/utils/google-benchmark/test/basic_test.cc
+++ b/utils/google-benchmark/test/basic_test.cc
@@ -0,0 +1,102 @@
 #include "benchmark/benchmark_api.h"
 #define BASIC_BENCHMARK_TEST(x) \
    BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192)
 void BM_empty(benchmark::State& state) {
  while (state.KeepRunning()) {
    benchmark::DoNotOptimize(state.iterations());
  }
 }
 BENCHMARK(BM_empty);
 BENCHMARK(BM_empty)->ThreadPerCpu();
 void BM_spin_empty(benchmark::State& state) {
  while (state.KeepRunning()) {
    for (int x = 0; x < state.range_x(); ++x) {
      benchmark::DoNotOptimize(x);
    }
  }
 }
 BASIC_BENCHMARK_TEST(BM_spin_empty);
 BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu();
 void BM_spin_pause_before(benchmark::State& state) {
  for (int i = 0; i < state.range_x(); ++i) {
    benchmark::DoNotOptimize(i);
  }
  while(state.KeepRunning()) {
    for (int i = 0; i < state.range_x(); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
 }
 BASIC_BENCHMARK_TEST(BM_spin_pause_before);
 BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
 void BM_spin_pause_during(benchmark::State& state) {
  while(state.KeepRunning()) {
    state.PauseTiming();
    for (int i = 0; i < state.range_x(); ++i) {
      benchmark::DoNotOptimize(i);
    }
    state.ResumeTiming();
    for (int i = 0; i < state.range_x(); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
 }
 BASIC_BENCHMARK_TEST(BM_spin_pause_during);
 BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu();
 void BM_pause_during(benchmark::State& state) {
  while(state.KeepRunning()) {
    state.PauseTiming();
    state.ResumeTiming();
  }
 }
 BENCHMARK(BM_pause_during);
 BENCHMARK(BM_pause_during)->ThreadPerCpu();
 BENCHMARK(BM_pause_during)->UseRealTime();
 BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu();
 void BM_spin_pause_after(benchmark::State& state) {
  while(state.KeepRunning()) {
    for (int i = 0; i < state.range_x(); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
  for (int i = 0; i < state.range_x(); ++i) {
    benchmark::DoNotOptimize(i);
  }
 }
 BASIC_BENCHMARK_TEST(BM_spin_pause_after);
 BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu();
 void BM_spin_pause_before_and_after(benchmark::State& state) {
  for (int i = 0; i < state.range_x(); ++i) {
    benchmark::DoNotOptimize(i);
  }
  while(state.KeepRunning()) {
    for (int i = 0; i < state.range_x(); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
  for (int i = 0; i < state.range_x(); ++i) {
    benchmark::DoNotOptimize(i);
  }
 }
 BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after);
 BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu();
 void BM_empty_stop_start(benchmark::State& state) {
  while (state.KeepRunning()) { }
 }
 BENCHMARK(BM_empty_stop_start);
 BENCHMARK(BM_empty_stop_start)->ThreadPerCpu();
 BENCHMARK_MAIN()
--- a/utils/google-benchmark/test/benchmark_test.cc
+++ b/utils/google-benchmark/test/benchmark_test.cc
@@ -0,0 +1,224 @@
 #include "benchmark/benchmark.h"
 #include <assert.h>
 #include <math.h>
 #include <stdint.h>
 #include <cstdlib>
 #include <iostream>
 #include <limits>
 #include <list>
 #include <map>
 #include <mutex>
 #include <set>
 #include <sstream>
 #include <string>
 #include <vector>
 #include <chrono>
 #include <thread>
 #include <utility>
 #if defined(__GNUC__)
 # define BENCHMARK_NOINLINE __attribute__((noinline))
 #else
 # define BENCHMARK_NOINLINE
 #endif
 namespace {
 int BENCHMARK_NOINLINE Factorial(uint32_t n) {
  return (n == 1) ? 1 : n * Factorial(n - 1);
 }
 double CalculatePi(int depth) {
  double pi = 0.0;
  for (int i = 0; i < depth; ++i) {
    double numerator = static_cast<double>(((i % 2) * 2) - 1);
    double denominator = static_cast<double>((2 * i) - 1);
    pi += numerator / denominator;
  }
  return (pi - 1.0) * 4;
 }
 std::set<int> ConstructRandomSet(int size) {
  std::set<int> s;
  for (int i = 0; i < size; ++i)
    s.insert(i);
  return s;
 }
 std::mutex test_vector_mu;
 std::vector<int>* test_vector = nullptr;
 }  // end namespace
 static void BM_Factorial(benchmark::State& state) {
  int fac_42 = 0;
  while (state.KeepRunning())
    fac_42 = Factorial(8);
  // Prevent compiler optimizations
  std::stringstream ss;
  ss << fac_42;
  state.SetLabel(ss.str());
 }
 BENCHMARK(BM_Factorial);
 BENCHMARK(BM_Factorial)->UseRealTime();
 static void BM_CalculatePiRange(benchmark::State& state) {
  double pi = 0.0;
  while (state.KeepRunning())
    pi = CalculatePi(state.range_x());
  std::stringstream ss;
  ss << pi;
  state.SetLabel(ss.str());
 }
 BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
 static void BM_CalculatePi(benchmark::State& state) {
  static const int depth = 1024;
  while (state.KeepRunning()) {
    benchmark::DoNotOptimize(CalculatePi(depth));
  }
 }
 BENCHMARK(BM_CalculatePi)->Threads(8);
 BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
 BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
    std::set<int> data = ConstructRandomSet(state.range_x());
    state.ResumeTiming();
    for (int j = 0; j < state.range_y(); ++j)
      data.insert(rand());
  }
  state.SetItemsProcessed(state.iterations() * state.range_y());
  state.SetBytesProcessed(state.iterations() * state.range_y() * sizeof(int));
 }
 BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
 template<typename Container, typename ValueType = typename Container::value_type>
 static void BM_Sequential(benchmark::State& state) {
  ValueType v = 42;
  while (state.KeepRunning()) {
    Container c;
    for (int i = state.range_x(); --i; )
      c.push_back(v);
  }
  const size_t items_processed = state.iterations() * state.range_x();
  state.SetItemsProcessed(items_processed);
  state.SetBytesProcessed(items_processed * sizeof(v));
 }
 BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)->Range(1 << 0, 1 << 10);
 BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
 // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
 #if __cplusplus >= 201103L
 BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
 #endif
 static void BM_StringCompare(benchmark::State& state) {
  std::string s1(state.range_x(), '-');
  std::string s2(state.range_x(), '-');
  while (state.KeepRunning())
    benchmark::DoNotOptimize(s1.compare(s2));
 }
 BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
 static void BM_SetupTeardown(benchmark::State& state) {
  if (state.thread_index == 0) {
    // No need to lock test_vector_mu here as this is running single-threaded.
    test_vector = new std::vector<int>();
  }
  int i = 0;
  while (state.KeepRunning()) {
    std::lock_guard<std::mutex> l(test_vector_mu);
    if (i%2 == 0)
      test_vector->push_back(i);
    else
      test_vector->pop_back();
    ++i;
  }
  if (state.thread_index == 0) {
    delete test_vector;
  }
 }
 BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
 static void BM_LongTest(benchmark::State& state) {
  double tracker = 0.0;
  while (state.KeepRunning()) {
    for (int i = 0; i < state.range_x(); ++i)
      benchmark::DoNotOptimize(tracker += i);
  }
 }
 BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
 static void BM_ParallelMemset(benchmark::State& state) {
  int size = state.range_x() / sizeof(int);
  int thread_size = size / state.threads;
  int from = thread_size * state.thread_index;
  int to = from + thread_size;
  if (state.thread_index == 0) {
    test_vector = new std::vector<int>(size);
  }
  while (state.KeepRunning()) {
    for (int i = from; i < to; i++) {
      // No need to lock test_vector_mu as ranges
      // do not overlap between threads.
      benchmark::DoNotOptimize(test_vector->at(i) = 1);
    }
  }
  if (state.thread_index == 0) {
    delete test_vector;
  }
 }
 BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
 static void BM_ManualTiming(benchmark::State& state) {
  size_t slept_for = 0;
  int microseconds = state.range_x();
  std::chrono::duration<double, std::micro> sleep_duration {
    static_cast<double>(microseconds)
  };
  while (state.KeepRunning()) {
    auto start   = std::chrono::high_resolution_clock::now();
    // Simulate some useful workload with a sleep
    std::this_thread::sleep_for(std::chrono::duration_cast<
      std::chrono::nanoseconds>(sleep_duration));
    auto end     = std::chrono::high_resolution_clock::now();
    auto elapsed =
      std::chrono::duration_cast<std::chrono::duration<double>>(
        end - start);
    state.SetIterationTime(elapsed.count());
    slept_for += microseconds;
  }
  state.SetItemsProcessed(slept_for);
 }
 BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
 BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
 #if __cplusplus >= 201103L
 template <class ...Args>
 void BM_with_args(benchmark::State& state, Args&&...) {
  while (state.KeepRunning()) {}
 }
 BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44);
 BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test,
                  std::string("abc"), std::pair<int, double>(42, 3.8));
 void BM_non_template_args(benchmark::State& state, int, double) {
  while(state.KeepRunning()) {}
 }
 BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
 #endif // __cplusplus >= 201103L
 BENCHMARK_MAIN()
--- a/utils/google-benchmark/test/complexity_test.cc
+++ b/utils/google-benchmark/test/complexity_test.cc
@@ -0,0 +1,297 @@
 #undef NDEBUG
 #include "benchmark/benchmark.h"
 #include "../src/check.h" // NOTE: check.h is for internal use only!
 #include "../src/re.h"    // NOTE: re.h is for internal use only
 #include <cassert>
 #include <cstring>
 #include <iostream>
 #include <sstream>
 #include <vector>
 #include <utility>
 #include <algorithm>
 #include <cmath>
 namespace {
 // ========================================================================= //
 // -------------------------- Testing Case --------------------------------- //
 // ========================================================================= //
 enum MatchRules {
  MR_Default, // Skip non-matching lines until a match is found.
  MR_Next    // Match must occur on the next line.
 };
 struct TestCase {
  std::string regex;
  int match_rule;
  TestCase(std::string re, int rule = MR_Default) : regex(re), match_rule(rule) {}
  void Check(std::stringstream& remaining_output) const {
    benchmark::Regex r;
    std::string err_str;
    r.Init(regex, &err_str);
    CHECK(err_str.empty()) << "Could not construct regex \"" << regex << "\""
                           << " got Error: " << err_str;
    std::string line;
    while (remaining_output.eof() == false) {
        CHECK(remaining_output.good());
        std::getline(remaining_output, line);
        if (r.Match(line)) return;
        CHECK(match_rule != MR_Next) << "Expected line \"" << line
                                     << "\" to match regex \"" << regex << "\"";
    }
    CHECK(remaining_output.eof() == false)
        << "End of output reached before match for regex \"" << regex
        << "\" was found";
  }
 };
 std::vector<TestCase> ConsoleOutputTests;
 std::vector<TestCase> JSONOutputTests;
 std::vector<TestCase> CSVOutputTests;
 // ========================================================================= //
 // -------------------------- Test Helpers --------------------------------- //
 // ========================================================================= //
 class TestReporter : public benchmark::BenchmarkReporter {
 public:
  TestReporter(std::vector<benchmark::BenchmarkReporter*> reps)
      : reporters_(reps)  {}
  virtual bool ReportContext(const Context& context) {
    bool last_ret = false;
    bool first = true;
    for (auto rep : reporters_) {
      bool new_ret = rep->ReportContext(context);
      CHECK(first || new_ret == last_ret)
          << "Reports return different values for ReportContext";
      first = false;
      last_ret = new_ret;
    }
    return last_ret;
  }
  virtual void ReportRuns(const std::vector<Run>& report) {
    for (auto rep : reporters_)
      rep->ReportRuns(report);
  }
  virtual void Finalize() {
      for (auto rep : reporters_)
        rep->Finalize();
  }
 private:
  std::vector<benchmark::BenchmarkReporter*> reporters_;
 };
 #define CONCAT2(x, y) x##y
 #define CONCAT(x, y) CONCAT2(x, y)
 #define ADD_CASES(...) \
    int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)
 int AddCases(std::vector<TestCase>* out, std::initializer_list<TestCase> const& v) {
  for (auto const& TC : v)
    out->push_back(TC);
  return 0;
 }
 template <class First>
 std::string join(First f) { return f; }
 template <class First, class ...Args>
 std::string join(First f, Args&&... args) {
    return std::string(std::move(f)) + "[ ]+" + join(std::forward<Args>(args)...);
 }
 std::string dec_re = "[0-9]+\\.[0-9]+";
 #define ADD_COMPLEXITY_CASES(...) \
    int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__)
 int AddComplexityTest(std::vector<TestCase>* console_out, std::vector<TestCase>* json_out,
                      std::vector<TestCase>* csv_out, std::string big_o_test_name, 
                      std::string rms_test_name, std::string big_o) {
  std::string big_o_str = dec_re + " " + big_o;
  AddCases(console_out, {
    {join("^" + big_o_test_name + "", big_o_str, big_o_str) + "[ ]*$"},
    {join("^" + rms_test_name + "", "[0-9]+ %", "[0-9]+ %") + "[ ]*$"}
  });
  AddCases(json_out, {
    {"\"name\": \"" + big_o_test_name + "\",$"},
    {"\"cpu_coefficient\": [0-9]+,$", MR_Next},
    {"\"real_coefficient\": [0-9]{1,5},$", MR_Next},
    {"\"big_o\": \"" + big_o + "\",$", MR_Next},
    {"\"time_unit\": \"ns\"$", MR_Next},
    {"}", MR_Next},
    {"\"name\": \"" + rms_test_name + "\",$"},
    {"\"rms\": [0-9]+%$", MR_Next},
    {"}", MR_Next}
  });
  AddCases(csv_out, {
    {"^\"" + big_o_test_name + "\",," + dec_re + "," + dec_re + "," + big_o + ",,,,,$"},
    {"^\"" + rms_test_name + "\",," + dec_re + "," + dec_re + ",,,,,,$"}
  });
  return 0;
 }
 }  // end namespace
 // ========================================================================= //
 // --------------------------- Testing BigO O(1) --------------------------- //
 // ========================================================================= //
 void BM_Complexity_O1(benchmark::State& state) {
  while (state.KeepRunning()) {
  }
  state.SetComplexityN(state.range_x());
 }
 BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(benchmark::o1);
 BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity([](int){return 1.0; });
 BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity();
 const char* big_o_1_test_name = "BM_Complexity_O1_BigO";
 const char* rms_o_1_test_name = "BM_Complexity_O1_RMS";
 const char* enum_auto_big_o_1 = "\\([0-9]+\\)";
 const char* lambda_big_o_1 = "f\\(N\\)";
 // Add enum tests
 ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, 
                     big_o_1_test_name, rms_o_1_test_name, enum_auto_big_o_1);
 // Add lambda tests
 ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, 
                     big_o_1_test_name, rms_o_1_test_name, lambda_big_o_1);
 // ========================================================================= //
 // --------------------------- Testing BigO O(N) --------------------------- //
 // ========================================================================= //
 std::vector<int> ConstructRandomVector(int size) {
  std::vector<int> v;
  v.reserve(size);
  for (int i = 0; i < size; ++i) {
    v.push_back(rand() % size);
  }
  return v;
 }
 void BM_Complexity_O_N(benchmark::State& state) {
  auto v = ConstructRandomVector(state.range_x());
  const int item_not_in_vector = state.range_x()*2; // Test worst case scenario (item not in vector)
  while (state.KeepRunning()) {
      benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
  }
  state.SetComplexityN(state.range_x());
 }
 BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oN);
 BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) -> double{return n; });
 BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity();
 const char* big_o_n_test_name = "BM_Complexity_O_N_BigO";
 const char* rms_o_n_test_name = "BM_Complexity_O_N_RMS";
 const char* enum_auto_big_o_n = "N";
 const char* lambda_big_o_n = "f\\(N\\)";
 // Add enum tests
 ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, 
                     big_o_n_test_name, rms_o_n_test_name, enum_auto_big_o_n);
 // Add lambda tests
 ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, 
                     big_o_n_test_name, rms_o_n_test_name, lambda_big_o_n);
 // ========================================================================= //
 // ------------------------- Testing BigO O(N*lgN) ------------------------- //
 // ========================================================================= //
 static void BM_Complexity_O_N_log_N(benchmark::State& state) {
  auto v = ConstructRandomVector(state.range_x());
  while (state.KeepRunning()) {
      std::sort(v.begin(), v.end());
  }
  state.SetComplexityN(state.range_x());
 }
 BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oNLogN);
 BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) {return n * std::log2(n); });
 BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity();
 const char* big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO";
 const char* rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS";
 const char* enum_auto_big_o_n_lg_n = "NlgN";
 const char* lambda_big_o_n_lg_n = "f\\(N\\)";
 // Add enum tests
 ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, 
                     big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n);
 // Add lambda tests
 ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, 
                     big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n);
 // ========================================================================= //
 // --------------------------- TEST CASES END ------------------------------ //
 // ========================================================================= //
 int main(int argc, char* argv[]) {
  // Add --color_print=false to argv since we don't want to match color codes.
  char new_arg[64];
  char* new_argv[64];
  std::copy(argv, argv + argc, new_argv);
  new_argv[argc++] = std::strcpy(new_arg, "--color_print=false");
  benchmark::Initialize(&argc, new_argv);
  benchmark::ConsoleReporter CR;
  benchmark::JSONReporter JR;
  benchmark::CSVReporter CSVR;
  struct ReporterTest {
    const char* name;
    std::vector<TestCase>& output_cases;
    benchmark::BenchmarkReporter& reporter;
    std::stringstream out_stream;
    std::stringstream err_stream;
    ReporterTest(const char* n,
                 std::vector<TestCase>& out_tc,
                 benchmark::BenchmarkReporter& br)
        : name(n), output_cases(out_tc), reporter(br) {
        reporter.SetOutputStream(&out_stream);
        reporter.SetErrorStream(&err_stream);
    }
  } TestCases[] = {
      {"ConsoleReporter", ConsoleOutputTests, CR},
      {"JSONReporter", JSONOutputTests, JR},
      {"CSVReporter", CSVOutputTests, CSVR}
  };
  // Create the test reporter and run the benchmarks.
  std::cout << "Running benchmarks...\n";
  TestReporter test_rep({&CR, &JR, &CSVR});
  benchmark::RunSpecifiedBenchmarks(&test_rep);
  for (auto& rep_test : TestCases) {
      std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n";
      std::string banner(msg.size() - 1, '-');
      std::cout << banner << msg << banner << "\n";
      std::cerr << rep_test.err_stream.str();
      std::cout << rep_test.out_stream.str();
      for (const auto& TC : rep_test.output_cases)
        TC.Check(rep_test.out_stream);
      std::cout << "\n";
  }
  return 0;
 }
--- a/utils/google-benchmark/test/cxx03_test.cc
+++ b/utils/google-benchmark/test/cxx03_test.cc
@@ -0,0 +1,31 @@
 #include <cstddef>
 #include "benchmark/benchmark.h"
 #if __cplusplus >= 201103L
 #error C++11 or greater detected. Should be C++03.
 #endif
 void BM_empty(benchmark::State& state) {
    while (state.KeepRunning()) {
        volatile std::size_t x = state.iterations();
        ((void)x);
    }
 }
 BENCHMARK(BM_empty);
 template <class T, class U>
 void BM_template2(benchmark::State& state) {
    BM_empty(state);
 }
 BENCHMARK_TEMPLATE2(BM_template2, int, long);
 template <class T>
 void BM_template1(benchmark::State& state) {
    BM_empty(state);
 }
 BENCHMARK_TEMPLATE(BM_template1, long);
 BENCHMARK_TEMPLATE1(BM_template1, int);
 BENCHMARK_MAIN()
--- a/utils/google-benchmark/test/diagnostics_test.cc
+++ b/utils/google-benchmark/test/diagnostics_test.cc
@@ -0,0 +1,61 @@
 // Testing:
 //   State::PauseTiming()
 //   State::ResumeTiming()
 // Test that CHECK's within these function diagnose when they are called
 // outside of the KeepRunning() loop.
 //
 // NOTE: Users should NOT include or use src/check.h. This is only done in
 // order to test library internals.
 #include "benchmark/benchmark_api.h"
 #include "../src/check.h"
 #include <stdexcept>
 #include <cstdlib>
 #if defined(__GNUC__) && !defined(__EXCEPTIONS)
 #define TEST_HAS_NO_EXCEPTIONS
 #endif
 void TestHandler() {
 #ifndef TEST_HAS_NO_EXCEPTIONS
  throw std::logic_error("");
 #else
  std::abort();
 #endif
 }
 void try_invalid_pause_resume(benchmark::State& state) {
 #if !defined(NDEBUG) && !defined(TEST_HAS_NO_EXCEPTIONS)
  try {
    state.PauseTiming();
    std::abort();
  } catch (std::logic_error const&) {}
  try {
    state.ResumeTiming();
    std::abort();
  } catch (std::logic_error const&) {}
 #else
  (void)state; // avoid unused warning
 #endif
 }
 void BM_diagnostic_test(benchmark::State& state) {
  static bool called_once = false;
  if (called_once == false) try_invalid_pause_resume(state);
  while (state.KeepRunning()) {
    benchmark::DoNotOptimize(state.iterations());
  }
  if (called_once == false) try_invalid_pause_resume(state);
  called_once = true;
 }
 BENCHMARK(BM_diagnostic_test);
 int main(int argc, char** argv) {
  benchmark::internal::GetAbortHandler() = &TestHandler;
  benchmark::Initialize(&argc, argv);
  benchmark::RunSpecifiedBenchmarks();
 }
--- a/utils/google-benchmark/test/donotoptimize_test.cc
+++ b/utils/google-benchmark/test/donotoptimize_test.cc
@@ -0,0 +1,36 @@
 #include "benchmark/benchmark.h"
 #include <cstdint>
 namespace {
 #if defined(__GNUC__)
  std::uint64_t double_up(const std::uint64_t x) __attribute__ ((const));
 #endif
  std::uint64_t double_up(const std::uint64_t x) {
    return x * 2;
  }
 }
 int main(int, char*[]) {
  // this test verifies compilation of DoNotOptimize() for some types
  char buffer8[8];
  benchmark::DoNotOptimize(buffer8);
  char buffer20[20];
  benchmark::DoNotOptimize(buffer20);
  char buffer1024[1024];
  benchmark::DoNotOptimize(buffer1024);
  benchmark::DoNotOptimize(&buffer1024[0]);
  int x = 123;
  benchmark::DoNotOptimize(x);
  benchmark::DoNotOptimize(&x);
  benchmark::DoNotOptimize(x += 42);
  benchmark::DoNotOptimize(double_up(x));
  return 0;
 }
--- a/utils/google-benchmark/test/filter_test.cc
+++ b/utils/google-benchmark/test/filter_test.cc
@@ -0,0 +1,105 @@
 #include "benchmark/benchmark.h"
 #include <cassert>
 #include <cmath>
 #include <cstdint>
 #include <cstdlib>
 #include <iostream>
 #include <limits>
 #include <sstream>
 #include <string>
 namespace {
 class TestReporter : public benchmark::ConsoleReporter {
 public:
  virtual bool ReportContext(const Context& context) {
    return ConsoleReporter::ReportContext(context);
  };
  virtual void ReportRuns(const std::vector<Run>& report) {
    ++count_;
    ConsoleReporter::ReportRuns(report);
  };
  TestReporter() : count_(0) {}
  virtual ~TestReporter() {}
  size_t GetCount() const {
    return count_;
  }
 private:
  mutable size_t count_;
 };
 }  // end namespace
 static void NoPrefix(benchmark::State& state) {
  while (state.KeepRunning()) {}
 }
 BENCHMARK(NoPrefix);
 static void BM_Foo(benchmark::State& state) {
  while (state.KeepRunning()) {}
 }
 BENCHMARK(BM_Foo);
 static void BM_Bar(benchmark::State& state) {
  while (state.KeepRunning()) {}
 }
 BENCHMARK(BM_Bar);
 static void BM_FooBar(benchmark::State& state) {
  while (state.KeepRunning()) {}
 }
 BENCHMARK(BM_FooBar);
 static void BM_FooBa(benchmark::State& state) {
  while (state.KeepRunning()) {}
 }
 BENCHMARK(BM_FooBa);
 int main(int argc, char** argv) {
  bool list_only = false;
  for (int i=0; i < argc; ++i)
    list_only |= std::string(argv[i]).find("--benchmark_list_tests") != std::string::npos;
  benchmark::Initialize(&argc, argv);
  TestReporter test_reporter;
  const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&test_reporter);
  if (argc == 2) {
    // Make sure we ran all of the tests
    std::stringstream ss(argv[1]);
    size_t expected_return;
    ss >> expected_return;
    if (returned_count != expected_return) {
      std::cerr << "ERROR: Expected " << expected_return
                << " tests to match the filter but returned_count = "
                << returned_count << std::endl;
      return -1;
    }
    const size_t expected_reports = list_only ? 0 : expected_return;
    const size_t reports_count = test_reporter.GetCount();
    if (reports_count != expected_reports) {
      std::cerr << "ERROR: Expected " << expected_reports
                << " tests to be run but reported_count = " << reports_count
                << std::endl;
      return -1;
    }
  }
  return 0;
 }
--- a/utils/google-benchmark/test/fixture_test.cc
+++ b/utils/google-benchmark/test/fixture_test.cc
@@ -0,0 +1,52 @@
 #include "benchmark/benchmark.h"
 #include <cassert>
 #include <memory>
 class MyFixture : public ::benchmark::Fixture {
 public:
  void SetUp(const ::benchmark::State& state) {
    if (state.thread_index == 0) {
      assert(data.get() == nullptr);
      data.reset(new int(42));
    }
  }
  void TearDown(const ::benchmark::State& state) {
    if (state.thread_index == 0) {
      assert(data.get() != nullptr);
      data.reset();
    }
  }
  ~MyFixture() {
    assert(data == nullptr);
  }
  std::unique_ptr<int> data;
 };
 BENCHMARK_F(MyFixture, Foo)(benchmark::State& st) {
  assert(data.get() != nullptr);
  assert(*data == 42);
  while (st.KeepRunning()) {
  }
 }
 BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) {
  if (st.thread_index == 0) {
    assert(data.get() != nullptr);
    assert(*data == 42);
  }
  while (st.KeepRunning()) {
    assert(data.get() != nullptr);
    assert(*data == 42);
  }
  st.SetItemsProcessed(st.range_x());
 }
 BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42);
 BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42)->ThreadPerCpu();
 BENCHMARK_MAIN()
--- a/utils/google-benchmark/test/map_test.cc
+++ b/utils/google-benchmark/test/map_test.cc
@@ -0,0 +1,58 @@
 #include "benchmark/benchmark.h"
 #include <cstdlib>
 #include <map>
 namespace {
 std::map<int, int> ConstructRandomMap(int size) {
  std::map<int, int> m;
  for (int i = 0; i < size; ++i) {
    m.insert(std::make_pair(rand() % size, rand() % size));
  }
  return m;
 }
 }  // namespace
 // Basic version.
 static void BM_MapLookup(benchmark::State& state) {
  const int size = state.range_x();
  while (state.KeepRunning()) {
    state.PauseTiming();
    std::map<int, int> m = ConstructRandomMap(size);
    state.ResumeTiming();
    for (int i = 0; i < size; ++i) {
      benchmark::DoNotOptimize(m.find(rand() % size));
    }
  }
  state.SetItemsProcessed(state.iterations() * size);
 }
 BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12);
 // Using fixtures.
 class MapFixture : public ::benchmark::Fixture {
 public:
  void SetUp(const ::benchmark::State& st) {
    m = ConstructRandomMap(st.range_x());
  }
  void TearDown(const ::benchmark::State&) {
    m.clear();
  }
  std::map<int, int> m;
 };
 BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) {
  const int size = state.range_x();
  while (state.KeepRunning()) {
    for (int i = 0; i < size; ++i) {
      benchmark::DoNotOptimize(m.find(rand() % size));
    }
  }
  state.SetItemsProcessed(state.iterations() * size);
 }
 BENCHMARK_REGISTER_F(MapFixture, Lookup)->Range(1<<3, 1<<12);
 BENCHMARK_MAIN()
--- a/utils/google-benchmark/test/options_test.cc
+++ b/utils/google-benchmark/test/options_test.cc
@@ -0,0 +1,44 @@
 #include "benchmark/benchmark_api.h"
 #include <chrono>
 #include <thread>
 void BM_basic(benchmark::State& state) {
  while (state.KeepRunning()) {
  }
 }
 void BM_basic_slow(benchmark::State& state) {
  std::chrono::milliseconds sleep_duration(state.range_x());
  while (state.KeepRunning()) {
    std::this_thread::sleep_for(
      std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration)
      );
  }
 }
 BENCHMARK(BM_basic);
 BENCHMARK(BM_basic)->Arg(42);
 BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond);
 BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond);
 BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond);
 BENCHMARK(BM_basic)->Range(1, 8);
 BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8);
 BENCHMARK(BM_basic)->DenseRange(10, 15);
 BENCHMARK(BM_basic)->ArgPair(42, 42);
 BENCHMARK(BM_basic)->RangePair(64, 512, 64, 512);
 BENCHMARK(BM_basic)->MinTime(0.7);
 BENCHMARK(BM_basic)->UseRealTime();
 BENCHMARK(BM_basic)->ThreadRange(2, 4);
 BENCHMARK(BM_basic)->ThreadPerCpu();
 BENCHMARK(BM_basic)->Repetitions(3);
 void CustomArgs(benchmark::internal::Benchmark* b) {
  for (int i = 0; i < 10; ++i) {
    b->Arg(i);
  }
 }
 BENCHMARK(BM_basic)->Apply(CustomArgs);
 BENCHMARK_MAIN()
--- a/utils/google-benchmark/test/reporter_output_test.cc
+++ b/utils/google-benchmark/test/reporter_output_test.cc
@@ -0,0 +1,259 @@
 #undef NDEBUG
 #include "benchmark/benchmark.h"
 #include "../src/check.h" // NOTE: check.h is for internal use only!
 #include "../src/re.h" // NOTE: re.h is for internal use only
 #include <cassert>
 #include <cstring>
 #include <iostream>
 #include <sstream>
 #include <vector>
 #include <utility>
 namespace {
 // ========================================================================= //
 // -------------------------- Testing Case --------------------------------- //
 // ========================================================================= //
 enum MatchRules {
  MR_Default, // Skip non-matching lines until a match is found.
  MR_Next    // Match must occur on the next line.
 };
 struct TestCase {
  std::string regex;
  int match_rule;
  TestCase(std::string re, int rule = MR_Default) : regex(re), match_rule(rule) {}
  void Check(std::stringstream& remaining_output) const {
    benchmark::Regex r;
    std::string err_str;
    r.Init(regex, &err_str);
    CHECK(err_str.empty()) << "Could not construct regex \"" << regex << "\""
                           << " got Error: " << err_str;
    std::string line;
    while (remaining_output.eof() == false) {
        CHECK(remaining_output.good());
        std::getline(remaining_output, line);
        if (r.Match(line)) return;
        CHECK(match_rule != MR_Next) << "Expected line \"" << line
                                     << "\" to match regex \"" << regex << "\"";
    }
    CHECK(remaining_output.eof() == false)
        << "End of output reached before match for regex \"" << regex
        << "\" was found";
  }
 };
 std::vector<TestCase> ConsoleOutputTests;
 std::vector<TestCase> JSONOutputTests;
 std::vector<TestCase> CSVOutputTests;
 std::vector<TestCase> ConsoleErrorTests;
 std::vector<TestCase> JSONErrorTests;
 std::vector<TestCase> CSVErrorTests;
 // ========================================================================= //
 // -------------------------- Test Helpers --------------------------------- //
 // ========================================================================= //
 class TestReporter : public benchmark::BenchmarkReporter {
 public:
  TestReporter(std::vector<benchmark::BenchmarkReporter*> reps)
      : reporters_(reps)  {}
  virtual bool ReportContext(const Context& context) {
    bool last_ret = false;
    bool first = true;
    for (auto rep : reporters_) {
      bool new_ret = rep->ReportContext(context);
      CHECK(first || new_ret == last_ret)
          << "Reports return different values for ReportContext";
      first = false;
      last_ret = new_ret;
    }
    return last_ret;
  }
  virtual void ReportRuns(const std::vector<Run>& report) {
    for (auto rep : reporters_)
      rep->ReportRuns(report);
  }
  virtual void Finalize() {
      for (auto rep : reporters_)
        rep->Finalize();
  }
 private:
  std::vector<benchmark::BenchmarkReporter*> reporters_;
 };
 #define CONCAT2(x, y) x##y
 #define CONCAT(x, y) CONCAT2(x, y)
 #define ADD_CASES(...) \
    int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)
 int AddCases(std::vector<TestCase>* out, std::initializer_list<TestCase> const& v) {
  for (auto const& TC : v)
    out->push_back(TC);
  return 0;
 }
 template <class First>
 std::string join(First f) { return f; }
 template <class First, class ...Args>
 std::string join(First f, Args&&... args) {
    return std::string(std::move(f)) + "[ ]+" + join(std::forward<Args>(args)...);
 }
 std::string dec_re = "[0-9]+\\.[0-9]+";
 }  // end namespace
 // ========================================================================= //
 // ---------------------- Testing Prologue Output -------------------------- //
 // ========================================================================= //
 ADD_CASES(&ConsoleOutputTests, {
    {join("^Benchmark", "Time", "CPU", "Iterations$"), MR_Next},
    {"^[-]+$", MR_Next}
 });
 ADD_CASES(&CSVOutputTests, {
  {"name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second,"
    "label,error_occurred,error_message"}
 });
 // ========================================================================= //
 // ------------------------ Testing Basic Output --------------------------- //
 // ========================================================================= //
 void BM_basic(benchmark::State& state) {
  while (state.KeepRunning()) {}
 }
 BENCHMARK(BM_basic);
 ADD_CASES(&ConsoleOutputTests, {
    {"^BM_basic[ ]+[0-9]{1,5} ns[ ]+[0-9]{1,5} ns[ ]+[0-9]+$"}
 });
 ADD_CASES(&JSONOutputTests, {
    {"\"name\": \"BM_basic\",$"},
    {"\"iterations\": [0-9]+,$", MR_Next},
    {"\"real_time\": [0-9]{1,5},$", MR_Next},
    {"\"cpu_time\": [0-9]{1,5},$", MR_Next},
    {"\"time_unit\": \"ns\"$", MR_Next},
    {"}", MR_Next}
 });
 ADD_CASES(&CSVOutputTests, {
    {"^\"BM_basic\",[0-9]+," + dec_re + "," + dec_re + ",ns,,,,,$"}
 });
 // ========================================================================= //
 // ------------------------ Testing Error Output --------------------------- //
 // ========================================================================= //
 void BM_error(benchmark::State& state) {
    state.SkipWithError("message");
    while(state.KeepRunning()) {}
 }
 BENCHMARK(BM_error);
 ADD_CASES(&ConsoleOutputTests, {
    {"^BM_error[ ]+ERROR OCCURRED: 'message'$"}
 });
 ADD_CASES(&JSONOutputTests, {
    {"\"name\": \"BM_error\",$"},
    {"\"error_occurred\": true,$", MR_Next},
    {"\"error_message\": \"message\",$", MR_Next}
 });
 ADD_CASES(&CSVOutputTests, {
    {"^\"BM_error\",,,,,,,,true,\"message\"$"}
 });
 // ========================================================================= //
 // ----------------------- Testing Complexity Output ----------------------- //
 // ========================================================================= //
 void BM_Complexity_O1(benchmark::State& state) {
  while (state.KeepRunning()) {
  }
  state.SetComplexityN(state.range_x());
 }
 BENCHMARK(BM_Complexity_O1)->Range(1, 1<<18)->Complexity(benchmark::o1);
 std::string bigOStr = "[0-9]+\\.[0-9]+ \\([0-9]+\\)";
 ADD_CASES(&ConsoleOutputTests, {
   {join("^BM_Complexity_O1_BigO", bigOStr, bigOStr) + "[ ]*$"},
   {join("^BM_Complexity_O1_RMS", "[0-9]+ %", "[0-9]+ %") + "[ ]*$"}
 });
 // ========================================================================= //
 // --------------------------- TEST CASES END ------------------------------ //
 // ========================================================================= //
 int main(int argc, char* argv[]) {
  // Add --color_print=false to argv since we don't want to match color codes.
  char new_arg[64];
  char* new_argv[64];
  std::copy(argv, argv + argc, new_argv);
  new_argv[argc++] = std::strcpy(new_arg, "--color_print=false");
  benchmark::Initialize(&argc, new_argv);
  benchmark::ConsoleReporter CR;
  benchmark::JSONReporter JR;
  benchmark::CSVReporter CSVR;
  struct ReporterTest {
    const char* name;
    std::vector<TestCase>& output_cases;
    std::vector<TestCase>& error_cases;
    benchmark::BenchmarkReporter& reporter;
    std::stringstream out_stream;
    std::stringstream err_stream;
    ReporterTest(const char* n,
                 std::vector<TestCase>& out_tc,
                 std::vector<TestCase>& err_tc,
                 benchmark::BenchmarkReporter& br)
        : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) {
        reporter.SetOutputStream(&out_stream);
        reporter.SetErrorStream(&err_stream);
    }
  } TestCases[] = {
      {"ConsoleReporter", ConsoleOutputTests, ConsoleErrorTests, CR},
      {"JSONReporter", JSONOutputTests, JSONErrorTests, JR},
      {"CSVReporter", CSVOutputTests, CSVErrorTests, CSVR}
  };
  // Create the test reporter and run the benchmarks.
  std::cout << "Running benchmarks...\n";
  TestReporter test_rep({&CR, &JR, &CSVR});
  benchmark::RunSpecifiedBenchmarks(&test_rep);
  for (auto& rep_test : TestCases) {
      std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n";
      std::string banner(msg.size() - 1, '-');
      std::cout << banner << msg << banner << "\n";
      std::cerr << rep_test.err_stream.str();
      std::cout << rep_test.out_stream.str();
      for (const auto& TC : rep_test.error_cases)
        TC.Check(rep_test.err_stream);
      for (const auto& TC : rep_test.output_cases)
        TC.Check(rep_test.out_stream);
      std::cout << "\n";
  }
  return 0;
 }
--- a/utils/google-benchmark/test/skip_with_error_test.cc
+++ b/utils/google-benchmark/test/skip_with_error_test.cc
@@ -0,0 +1,161 @@
 #undef NDEBUG
 #include "benchmark/benchmark.h"
 #include "../src/check.h" // NOTE: check.h is for internal use only!
 #include <cassert>
 #include <vector>
 namespace {
 class TestReporter : public benchmark::ConsoleReporter {
 public:
  virtual bool ReportContext(const Context& context) {
    return ConsoleReporter::ReportContext(context);
  };
  virtual void ReportRuns(const std::vector<Run>& report) {
    all_runs_.insert(all_runs_.end(), begin(report), end(report));
    ConsoleReporter::ReportRuns(report);
  }
  TestReporter()  {}
  virtual ~TestReporter() {}
  mutable std::vector<Run> all_runs_;
 };
 struct TestCase {
  std::string name;
  bool error_occurred;
  std::string error_message;
  typedef benchmark::BenchmarkReporter::Run Run;
  void CheckRun(Run const& run) const {
    CHECK(name == run.benchmark_name) << "expected " << name << " got " << run.benchmark_name;
    CHECK(error_occurred == run.error_occurred);
    CHECK(error_message == run.error_message);
    if (error_occurred) {
      //CHECK(run.iterations == 0);
    } else {
      CHECK(run.iterations != 0);
    }
  }
 };
 std::vector<TestCase> ExpectedResults;
 int AddCases(const char* base_name, std::initializer_list<TestCase> const& v) {
  for (auto TC : v) {
    TC.name = base_name + TC.name;
    ExpectedResults.push_back(std::move(TC));
  }
  return 0;
 }
 #define CONCAT(x, y) CONCAT2(x, y)
 #define CONCAT2(x, y) x##y
 #define ADD_CASES(...) \
 int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)
 }  // end namespace
 void BM_error_before_running(benchmark::State& state) {
  state.SkipWithError("error message");
  while (state.KeepRunning()) {
    assert(false);
  }
 }
 BENCHMARK(BM_error_before_running);
 ADD_CASES("BM_error_before_running",
          {{"", true, "error message"}});
 void BM_error_during_running(benchmark::State& state) {
  int first_iter = true;
  while (state.KeepRunning()) {
    if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) {
      assert(first_iter);
      first_iter = false;
      state.SkipWithError("error message");
    } else {
      state.PauseTiming();
      state.ResumeTiming();
    }
  }
 }
 BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8);
 ADD_CASES(
    "BM_error_during_running",
    {{"/1/threads:1", true, "error message"},
    {"/1/threads:2", true, "error message"},
    {"/1/threads:4", true, "error message"},
    {"/1/threads:8", true, "error message"},
    {"/2/threads:1", false, ""},
    {"/2/threads:2", false, ""},
    {"/2/threads:4", false, ""},
    {"/2/threads:8", false, ""}}
 );
 void BM_error_after_running(benchmark::State& state) {
  while (state.KeepRunning()) {
    benchmark::DoNotOptimize(state.iterations());
  }
  if (state.thread_index <= (state.threads / 2))
    state.SkipWithError("error message");
 }
 BENCHMARK(BM_error_after_running)->ThreadRange(1, 8);
 ADD_CASES(
    "BM_error_after_running",
    {{"/threads:1", true, "error message"},
    {"/threads:2", true, "error message"},
    {"/threads:4", true, "error message"},
    {"/threads:8", true, "error message"}}
 );
 void BM_error_while_paused(benchmark::State& state) {
  bool first_iter = true;
  while (state.KeepRunning()) {
    if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) {
      assert(first_iter);
      first_iter = false;
      state.PauseTiming();
      state.SkipWithError("error message");
    } else {
      state.PauseTiming();
      state.ResumeTiming();
    }
  }
 }
 BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8);
 ADD_CASES(
    "BM_error_while_paused",
    {{"/1/threads:1", true, "error message"},
    {"/1/threads:2", true, "error message"},
    {"/1/threads:4", true, "error message"},
    {"/1/threads:8", true, "error message"},
    {"/2/threads:1", false, ""},
    {"/2/threads:2", false, ""},
    {"/2/threads:4", false, ""},
    {"/2/threads:8", false, ""}}
 );
 int main(int argc, char* argv[]) {
  benchmark::Initialize(&argc, argv);
  TestReporter test_reporter;
  benchmark::RunSpecifiedBenchmarks(&test_reporter);
  typedef benchmark::BenchmarkReporter::Run Run;
  auto EB = ExpectedResults.begin();
  for (Run const& run : test_reporter.all_runs_) {
    assert(EB != ExpectedResults.end());
    EB->CheckRun(run);
    ++EB;
  }
  assert(EB == ExpectedResults.end());
  return 0;
 }