Update in-tree Google Benchmark to current ToT.

I've put some work into the Google Benchmark library in order to make it easier to benchmark libc++. These changes have already been upstreamed into Google Benchmark and this patch applies the changes to the in-tree version. The main improvement in the addition of a 'compare_bench.py' script which makes it very easy to compare benchmarks. For example to compare the native STL to libc++ you would run: `$ compare_bench.py ./util_smartptr.native.out ./util_smartptr.libcxx.out` And the output would look like: RUNNING: ./util_smartptr.native.out Benchmark Time CPU Iterations ---------------------------------------------------------------- BM_SharedPtrCreateDestroy 62 ns 62 ns 10937500 BM_SharedPtrIncDecRef 31 ns 31 ns 23972603 BM_WeakPtrIncDecRef 28 ns 28 ns 23648649 RUNNING: ./util_smartptr.libcxx.out Benchmark Time CPU Iterations ---------------------------------------------------------------- BM_SharedPtrCreateDestroy 46 ns 46 ns 14957265 BM_SharedPtrIncDecRef 31 ns 31 ns 22435897 BM_WeakPtrIncDecRef 34 ns 34 ns 21084337 Comparing ./util_smartptr.native.out to ./util_smartptr.libcxx.out Benchmark Time CPU ----------------------------------------------------- BM_SharedPtrCreateDestroy -0.26 -0.26 BM_SharedPtrIncDecRef +0.00 +0.00 BM_WeakPtrIncDecRef +0.21 +0.21 git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@278147 91177308-0d34-0410-b5e6-96231b3b80d8
2016-08-09 18:56:48 +00:00
parent 1fceb5e53d
commit 30b48cb1b3
33 changed files with 1103 additions and 291 deletions
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -87,7 +87,12 @@ macro(add_benchmark_test name source_file)
  add_executable(${libcxx_target} EXCLUDE_FROM_ALL ${source_file})
  add_dependencies(${libcxx_target} cxx google-benchmark-libcxx)
  add_dependencies(libcxx-benchmarks ${libcxx_target})
-  target_link_libraries(${libcxx_target} cxx -lbenchmark)
+  if (LIBCXX_ENABLE_SHARED)
+    target_link_libraries(${libcxx_target} cxx_shared)
+  else()
+    target_link_libraries(${libcxx_target} cxx_static)
+  endif()
+  target_link_libraries(${libcxx_target} -lbenchmark)
  set_target_properties(${libcxx_target}
    PROPERTIES
          OUTPUT_NAME "${name}.libcxx.out"
--- a/benchmarks/ContainerBenchmarks.hpp
+++ b/benchmarks/ContainerBenchmarks.hpp
@@ -10,7 +10,7 @@ namespace ContainerBenchmarks {

 template <class Container, class GenInputs>
 void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
-    auto in = gen(st.range_x());
+    auto in = gen(st.range(0));
    const auto end = in.end();
    benchmark::DoNotOptimize(&in);
    while (st.KeepRunning()) {
@@ -21,7 +21,7 @@ void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {

 template <class Container, class GenInputs>
 void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
-    auto in = gen(st.range_x());
+    auto in = gen(st.range(0));
    const auto end = in.end();
    while (st.KeepRunning()) {
        c.clear();
@@ -34,7 +34,7 @@ void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {

 template <class Container, class GenInputs>
 void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {
-    auto in = gen(st.range_x());
+    auto in = gen(st.range(0));
    const auto end = in.end();
    while (st.KeepRunning()) {
        c.clear();
@@ -49,7 +49,7 @@ void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {

 template <class Container, class GenInputs>
 void BM_InsertDuplicate(benchmark::State& st, Container c, GenInputs gen) {
-    auto in = gen(st.range_x());
+    auto in = gen(st.range(0));
    const auto end = in.end();
    c.insert(in.begin(), in.end());
    benchmark::DoNotOptimize(&c);
@@ -65,7 +65,7 @@ void BM_InsertDuplicate(benchmark::State& st, Container c, GenInputs gen) {

 template <class Container, class GenInputs>
 void BM_EmplaceDuplicate(benchmark::State& st, Container c, GenInputs gen) {
-    auto in = gen(st.range_x());
+    auto in = gen(st.range(0));
    const auto end = in.end();
    c.insert(in.begin(), in.end());
    benchmark::DoNotOptimize(&c);
@@ -80,7 +80,7 @@ void BM_EmplaceDuplicate(benchmark::State& st, Container c, GenInputs gen) {

 template <class Container, class GenInputs>
 static void BM_Find(benchmark::State& st, Container c, GenInputs gen) {
-    auto in = gen(st.range_x());
+    auto in = gen(st.range(0));
    c.insert(in.begin(), in.end());
    benchmark::DoNotOptimize(&(*c.begin()));
    const auto end = in.data() + in.size();
@@ -95,7 +95,7 @@ static void BM_Find(benchmark::State& st, Container c, GenInputs gen) {
 template <class Container, class GenInputs>
 static void BM_FindRehash(benchmark::State& st, Container c, GenInputs gen) {
    c.rehash(8);
-    auto in = gen(st.range_x());
+    auto in = gen(st.range(0));
    c.insert(in.begin(), in.end());
    benchmark::DoNotOptimize(&(*c.begin()));
    const auto end = in.data() + in.size();
--- a/benchmarks/algorithms.bench.cpp
+++ b/benchmarks/algorithms.bench.cpp
@@ -10,7 +10,7 @@ constexpr std::size_t TestNumInputs = 1024;
 template <class GenInputs>
 void BM_Sort(benchmark::State& st, GenInputs gen) {
    using ValueType = typename decltype(gen(0))::value_type;
-    const auto in = gen(st.range_x());
+    const auto in = gen(st.range(0));
    std::vector<ValueType> inputs[5];
    auto reset_inputs = [&]() {
        for (auto& C : inputs) {
--- a/benchmarks/unordered_set_operations.bench.cpp
+++ b/benchmarks/unordered_set_operations.bench.cpp
@@ -109,7 +109,7 @@ struct UInt64Hash2 {

 template <class HashFn, class GenInputs>
 void BM_Hash(benchmark::State& st, HashFn fn, GenInputs gen) {
-    auto in = gen(st.range_x());
+    auto in = gen(st.range(0));
    const auto end = in.data() + in.size();
    std::size_t last_hash = 0;
    benchmark::DoNotOptimize(&last_hash);
--- a/utils/google-benchmark/AUTHORS
+++ b/utils/google-benchmark/AUTHORS
@@ -13,6 +13,7 @@ Arne Beer <arne@twobeer.de>
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
 Dominic Hamon <dma@stripysock.com>
+Eric Fiselier <eric@efcs.ca>
 Eugene Zhuk <eugene.zhuk@gmail.com>
 Evgeny Safronov <division494@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
--- a/utils/google-benchmark/README.md
+++ b/utils/google-benchmark/README.md
@@ -40,13 +40,13 @@ measuring the speed of `memcpy()` calls of different lengths:

 ```c++
 static void BM_memcpy(benchmark::State& state) {
-  char* src = new char[state.range_x()];
-  char* dst = new char[state.range_x()];
-  memset(src, 'x', state.range_x());
+  char* src = new char[state.range(0)];
+  char* dst = new char[state.range(0)];
+  memset(src, 'x', state.range(0));
  while (state.KeepRunning())
-    memcpy(dst, src, state.range_x());
+    memcpy(dst, src, state.range(0));
  state.SetBytesProcessed(int64_t(state.iterations()) *
-                          int64_t(state.range_x()));
+                          int64_t(state.range(0)));
  delete[] src;
  delete[] dst;
 }
@@ -70,7 +70,7 @@ BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10);
 ```
 Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ].

-You might have a benchmark that depends on two inputs. For example, the
+You might have a benchmark that depends on two or more inputs. For example, the
 following code defines a family of benchmarks for measuring the speed of set
 insertion.

@@ -78,21 +78,21 @@ insertion.
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
-    std::set<int> data = ConstructRandomSet(state.range_x());
+    std::set<int> data = ConstructRandomSet(state.range(0));
    state.ResumeTiming();
-    for (int j = 0; j < state.range_y(); ++j)
+    for (int j = 0; j < state.range(1); ++j)
      data.insert(RandomNumber());
  }
 }
 BENCHMARK(BM_SetInsert)
-    ->ArgPair(1<<10, 1)
-    ->ArgPair(1<<10, 8)
-    ->ArgPair(1<<10, 64)
-    ->ArgPair(1<<10, 512)
-    ->ArgPair(8<<10, 1)
-    ->ArgPair(8<<10, 8)
-    ->ArgPair(8<<10, 64)
-    ->ArgPair(8<<10, 512);
+    ->Args({1<<10, 1})
+    ->Args({1<<10, 8})
+    ->Args({1<<10, 64})
+    ->Args({1<<10, 512})
+    ->Args({8<<10, 1})
+    ->Args({8<<10, 8})
+    ->Args({8<<10, 64})
+    ->Args({8<<10, 512});
 ```

 The preceding code is quite repetitive, and can be replaced with the following
@@ -101,7 +101,7 @@ product of the two specified ranges and will generate a benchmark for each such
 pair.

 ```c++
-BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}});
 ```

 For more complex patterns of inputs, passing a custom function to `Apply` allows
@@ -113,7 +113,7 @@ and a sparse range on the second.
 static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int i = 0; i <= 10; ++i)
    for (int j = 32; j <= 1024*1024; j *= 8)
-      b->ArgPair(i, j);
+      b->Args({i, j});
 }
 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
 ```
@@ -125,12 +125,12 @@ running time and the normalized root-mean square error of string comparison.

 ```c++
 static void BM_StringCompare(benchmark::State& state) {
-  std::string s1(state.range_x(), '-');
-  std::string s2(state.range_x(), '-');
+  std::string s1(state.range(0), '-');
+  std::string s2(state.range(0), '-');
  while (state.KeepRunning()) {
    benchmark::DoNotOptimize(s1.compare(s2));
  }
-  state.SetComplexityN(state.range_x());
+  state.SetComplexityN(state.range(0));
 }
 BENCHMARK(BM_StringCompare)
    ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN);
@@ -162,14 +162,14 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
  Q q;
  typename Q::value_type v;
  while (state.KeepRunning()) {
-    for (int i = state.range_x(); i--; )
+    for (int i = state.range(0); i--; )
      q.push(v);
-    for (int e = state.range_x(); e--; )
+    for (int e = state.range(0); e--; )
      q.Wait(&v);
  }
  // actually messages, not bytes:
  state.SetBytesProcessed(
-      static_cast<int64_t>(state.iterations())*state.range_x());
+      static_cast<int64_t>(state.iterations())*state.range(0));
 }
 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
 ```
@@ -206,6 +206,34 @@ BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
 Note that elements of `...args` may refer to global variables. Users should
 avoid modifying global state inside of a benchmark.

+## Using RegisterBenchmark(name, fn, args...)
+
+The `RegisterBenchmark(name, func, args...)` function provides an alternative
+way to create and register benchmarks.
+`RegisterBenchmark(name, func, args...)` creates, registers, and returns a
+pointer to a new benchmark with the specified `name` that invokes
+`func(st, args...)` where `st` is a `benchmark::State` object.
+
+Unlike the `BENCHMARK` registration macros, which can only be used at the global
+scope, the `RegisterBenchmark` can be called anywhere. This allows for
+benchmark tests to be registered programmatically.
+
+Additionally `RegisterBenchmark` allows any callable object to be registered
+as a benchmark. Including capturing lambdas and function objects. This
+allows the creation
+
+For Example:
+```c++
+auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ };
+
+int main(int argc, char** argv) {
+  for (auto& test_input : { /* ... */ })
+      benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input);
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+}
+```
+
 ### Multithreaded benchmarks
 In a multithreaded test (benchmark invoked by multiple threads simultaneously),
 it is guaranteed that none of the threads will start until all have called
@@ -256,7 +284,7 @@ can be reported back with `SetIterationTime`.

 ```c++
 static void BM_ManualTiming(benchmark::State& state) {
-  int microseconds = state.range_x();
+  int microseconds = state.range(0);
  std::chrono::duration<double, std::micro> sleep_duration {
    static_cast<double>(microseconds)
  };
@@ -427,10 +455,10 @@ static void BM_test(benchmark::State& state) {

 ## Output Formats
 The library supports multiple output formats. Use the
-`--benchmark_format=<tabular|json|csv>` flag to set the format type. `tabular` is
-the default format.
+`--benchmark_format=<console|json|csv>` flag to set the format type. `console`
+is the default format.

-The Tabular format is intended to be a human readable format. By default
+The Console format is intended to be a human readable format. By default
 the format generates color output. Context is output on stderr and the 
 tabular data on stdout. Example tabular output looks like:
 ```
@@ -493,6 +521,12 @@ name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label
 "BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06,
 ```

+## Output Files
+The library supports writing the output of the benchmark to a file specified
+by `--benchmark_out=<filename>`. The format of the output can be specified
+using `--benchmark_out_format={json|console|csv}`. Specifying
+`--benchmark_out` does not suppress the console output.
+
 ## Debug vs Release
 By default, benchmark builds as a debug library. You will see a warning in the output when this is the case. To build it as a release library instead, use:

@@ -507,4 +541,22 @@ cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true
 ```

 ## Linking against the library
-When using gcc, it is necessary to link against pthread to avoid runtime exceptions. This is due to how gcc implements std::thread. See [issue #67](https://github.com/google/benchmark/issues/67) for more details.
+When using gcc, it is necessary to link against pthread to avoid runtime exceptions.
+This is due to how gcc implements std::thread.
+See [issue #67](https://github.com/google/benchmark/issues/67) for more details.
+
+## Compiler Support
+
+Google Benchmark uses C++11 when building the library. As such we require
+a modern C++ toolchain, both compiler and standard library.
+
+The following minimum versions are strongly recommended build the library:
+
+* GCC 4.8
+* Clang 3.4
+* Visual Studio 2013
+
+Anything older *may* work.
+
+Note: Using the library and its headers in C++03 is supported. C++11 is only
+required to build the library.
--- a/utils/google-benchmark/include/benchmark/benchmark_api.h
+++ b/utils/google-benchmark/include/benchmark/benchmark_api.h
@@ -38,12 +38,12 @@ int main(int argc, char** argv) {
 // of memcpy() calls of different lengths:

 static void BM_memcpy(benchmark::State& state) {
-  char* src = new char[state.range_x()]; char* dst = new char[state.range_x()];
-  memset(src, 'x', state.range_x());
+  char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
+  memset(src, 'x', state.range(0));
  while (state.KeepRunning())
-    memcpy(dst, src, state.range_x());
+    memcpy(dst, src, state.range(0));
  state.SetBytesProcessed(int64_t(state.iterations()) *
-                          int64_t(state.range_x()));
+                          int64_t(state.range(0)));
  delete[] src; delete[] dst;
 }
 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
@@ -60,27 +60,27 @@ BENCHMARK(BM_memcpy)->Range(8, 8<<10);
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
-    set<int> data = ConstructRandomSet(state.range_x());
+    set<int> data = ConstructRandomSet(state.range(0));
    state.ResumeTiming();
-    for (int j = 0; j < state.range_y(); ++j)
+    for (int j = 0; j < state.range(1); ++j)
      data.insert(RandomNumber());
  }
 }
 BENCHMARK(BM_SetInsert)
-   ->ArgPair(1<<10, 1)
-   ->ArgPair(1<<10, 8)
-   ->ArgPair(1<<10, 64)
-   ->ArgPair(1<<10, 512)
-   ->ArgPair(8<<10, 1)
-   ->ArgPair(8<<10, 8)
-   ->ArgPair(8<<10, 64)
-   ->ArgPair(8<<10, 512);
+   ->Args({1<<10, 1})
+   ->Args({1<<10, 8})
+   ->Args({1<<10, 64})
+   ->Args({1<<10, 512})
+   ->Args({8<<10, 1})
+   ->Args({8<<10, 8})
+   ->Args({8<<10, 64})
+   ->Args({8<<10, 512});

 // The preceding code is quite repetitive, and can be replaced with
 // the following short-hand.  The following macro will pick a few
 // appropriate arguments in the product of the two specified ranges
 // and will generate a microbenchmark for each such pair.
-BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {1, 512}});

 // For more complex patterns of inputs, passing a custom function
 // to Apply allows programmatic specification of an
@@ -90,7 +90,7 @@ BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
 static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int i = 0; i <= 10; ++i)
    for (int j = 32; j <= 1024*1024; j *= 8)
-      b->ArgPair(i, j);
+      b->Args({i, j});
 }
 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);

@@ -101,14 +101,14 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
  Q q;
  typename Q::value_type v;
  while (state.KeepRunning()) {
-    for (int i = state.range_x(); i--; )
+    for (int i = state.range(0); i--; )
      q.push(v);
-    for (int e = state.range_x(); e--; )
+    for (int e = state.range(0); e--; )
      q.Wait(&v);
  }
  // actually messages, not bytes:
  state.SetBytesProcessed(
-      static_cast<int64_t>(state.iterations())*state.range_x());
+      static_cast<int64_t>(state.iterations())*state.range(0));
 }
 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);

@@ -153,8 +153,15 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #include <stddef.h>
 #include <stdint.h>

+#include <vector>
+
 #include "macros.h"

+#if defined(BENCHMARK_HAS_CXX11)
+#include <type_traits>
+#include <utility>
+#endif
+
 namespace benchmark {
 class BenchmarkReporter;

@@ -165,11 +172,16 @@ void Initialize(int* argc, char** argv);
 // of each matching benchmark. Otherwise run each matching benchmark and
 // report the results.
 //
-// The second overload reports the results using the specified 'reporter'.
+// The second and third overload use the specified 'console_reporter' and
+//  'file_reporter' respectively. 'file_reporter' will write to the file specified
+//   by '--benchmark_output'. If '--benchmark_output' is not given the
+//  'file_reporter' is ignored.
 //
 // RETURNS: The number of matching benchmarks.
 size_t RunSpecifiedBenchmarks();
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter);
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter);
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
+                              BenchmarkReporter* file_reporter);


 // If this routine is called, peak memory allocation past this point in the
@@ -258,7 +270,7 @@ typedef double(BigOFunc)(int);
 // benchmark to use.
 class State {
 public:
-  State(size_t max_iters, bool has_x, int x, bool has_y, int y,
+  State(size_t max_iters, const std::vector<int>& ranges,
        int thread_i, int n_threads);

  // Returns true if the benchmark should continue through another iteration.
@@ -367,7 +379,7 @@ public:
  }

  BENCHMARK_ALWAYS_INLINE
-  size_t complexity_length_n() {
+  int complexity_length_n() {
    return complexity_n_;
  }

@@ -413,17 +425,9 @@ public:

  // Range arguments for this run. CHECKs if the argument has been set.
  BENCHMARK_ALWAYS_INLINE
-  int range_x() const {
-    assert(has_range_x_);
-    ((void)has_range_x_); // Prevent unused warning.
-    return range_x_;
-  }
-
-  BENCHMARK_ALWAYS_INLINE
-  int range_y() const {
-    assert(has_range_y_);
-    ((void)has_range_y_); // Prevent unused warning.
-    return range_y_;
+  int range(std::size_t pos) const {
+      assert(range_.size() > pos);
+      return range_[pos];
  }

  BENCHMARK_ALWAYS_INLINE
@@ -434,11 +438,7 @@ private:
  bool finished_;
  size_t total_iterations_;

-  bool has_range_x_;
-  int range_x_;
-
-  bool has_range_y_;
-  int range_y_;
+  std::vector<int> range_;

  size_t bytes_processed_;
  size_t items_processed_;
@@ -489,24 +489,22 @@ public:
  // REQUIRES: The function passed to the constructor must accept an arg1.
  Benchmark* Range(int start, int limit);

-  // Run this benchmark once for every value in the range [start..limit]
+  // Run this benchmark once for all values in the range [start..limit] with specific step
  // REQUIRES: The function passed to the constructor must accept an arg1.
-  Benchmark* DenseRange(int start, int limit);
+  Benchmark* DenseRange(int start, int limit, int step = 1);

-  // Run this benchmark once with "x,y" as the extra arguments passed
+  // Run this benchmark once with "args" as the extra arguments passed
  // to the function.
-  // REQUIRES: The function passed to the constructor must accept arg1,arg2.
-  Benchmark* ArgPair(int x, int y);
+  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
+  Benchmark* Args(const std::vector<int>& args);

-  // Pick a set of values A from the range [lo1..hi1] and a set
-  // of values B from the range [lo2..hi2].  Run the benchmark for
-  // every pair of values in the cartesian product of A and B
-  // (i.e., for all combinations of the values in A and B).
-  // REQUIRES: The function passed to the constructor must accept arg1,arg2.
-  Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2);
+  // Run this benchmark once for a number of values picked from the
+  // ranges [start..limit].  (starts and limits are always picked.)
+  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
+  Benchmark* Ranges(const std::vector<std::pair<int, int> >& ranges);

  // Pass this benchmark object to *func, which can customize
-  // the benchmark by calling various methods like Arg, ArgPair,
+  // the benchmark by calling various methods like Arg, Args,
  // Threads, etc.
  Benchmark* Apply(void (*func)(Benchmark* benchmark));

@@ -587,6 +585,20 @@ private:
  Benchmark& operator=(Benchmark const&);
 };

+} // namespace internal
+
+// Create and register a benchmark with the specified 'name' that invokes
+// the specified functor 'fn'.
+//
+// RETURNS: A pointer to the registered benchmark.
+internal::Benchmark* RegisterBenchmark(const char* name, internal::Function* fn);
+
+#if defined(BENCHMARK_HAS_CXX11)
+template <class Lambda>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
+#endif
+
+namespace internal {
 // The class used to hold all Benchmarks created from static function.
 // (ie those created using the BENCHMARK(...) macros.
 class FunctionBenchmark : public Benchmark {
@@ -600,8 +612,57 @@ private:
    Function* func_;
 };

+#ifdef BENCHMARK_HAS_CXX11
+template <class Lambda>
+class LambdaBenchmark : public Benchmark {
+public:
+    virtual void Run(State& st) { lambda_(st); }
+
+private:
+  template <class OLambda>
+  LambdaBenchmark(const char* name, OLambda&& lam)
+      : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
+
+  LambdaBenchmark(LambdaBenchmark const&) = delete;
+
+private:
+  template <class Lam>
+  friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
+
+  Lambda lambda_;
+};
+#endif
+
 }  // end namespace internal

+inline internal::Benchmark*
+RegisterBenchmark(const char* name, internal::Function* fn) {
+    return internal::RegisterBenchmarkInternal(
+        ::new internal::FunctionBenchmark(name, fn));
+}
+
+#ifdef BENCHMARK_HAS_CXX11
+template <class Lambda>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
+    using BenchType = internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
+    return internal::RegisterBenchmarkInternal(
+        ::new BenchType(name, std::forward<Lambda>(fn)));
+}
+#endif
+
+#if defined(BENCHMARK_HAS_CXX11) && \
+     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
+template <class Lambda, class ...Args>
+internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
+                                       Args&&... args) {
+    return benchmark::RegisterBenchmark(name,
+        [=](benchmark::State& st) { fn(st, args...); });
+}
+#else
+#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+#endif
+
+
 // The base class for all fixture tests.
 class Fixture: public internal::Benchmark {
 public:
@@ -652,11 +713,11 @@ protected:

 // Old-style macros
 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
-#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
+#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
-  BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
+  BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})

 #if __cplusplus >= 201103L

--- a/utils/google-benchmark/include/benchmark/macros.h
+++ b/utils/google-benchmark/include/benchmark/macros.h
@@ -14,7 +14,11 @@
 #ifndef BENCHMARK_MACROS_H_
 #define BENCHMARK_MACROS_H_

-#if __cplusplus < 201103L
+#if __cplusplus >= 201103L
+#define BENCHMARK_HAS_CXX11
+#endif
+
+#ifndef BENCHMARK_HAS_CXX11
 # define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName)  \
    TypeName(const TypeName&);                         \
    TypeName& operator=(const TypeName&)
@@ -53,4 +57,8 @@
 # define BENCHMARK_BUILTIN_EXPECT(x, y) x
 #endif

+#if defined(__GNUC__) && !defined(__clang__)
+#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#endif
+
 #endif  // BENCHMARK_MACROS_H_
--- a/utils/google-benchmark/include/benchmark/reporter.h
+++ b/utils/google-benchmark/include/benchmark/reporter.h
@@ -157,13 +157,22 @@ class BenchmarkReporter {
 // default reporter used by RunSpecifiedBenchmarks().
 class ConsoleReporter : public BenchmarkReporter {
 public:
+  enum OutputOptions {
+    OO_None,
+    OO_Color
+  };
+  explicit ConsoleReporter(OutputOptions color_output = OO_Color)
+      : color_output_(color_output == OO_Color) {}
+
  virtual bool ReportContext(const Context& context);
  virtual void ReportRuns(const std::vector<Run>& reports);

 protected:
  virtual void PrintRunData(const Run& report);
-
  size_t name_field_width_;
+
+private:
+  bool color_output_;
 };

 class JSONReporter : public BenchmarkReporter {
--- a/utils/google-benchmark/src/benchmark.cc
+++ b/utils/google-benchmark/src/benchmark.cc
@@ -28,6 +28,7 @@
 #include <atomic>
 #include <condition_variable>
 #include <iostream>
+#include <fstream>
 #include <memory>
 #include <thread>

@@ -69,6 +70,12 @@ DEFINE_string(benchmark_format, "console",
              "The format to use for console output. Valid values are "
              "'console', 'json', or 'csv'.");

+DEFINE_string(benchmark_out_format, "json",
+              "The format to use for file output. Valid values are "
+              "'console', 'json', or 'csv'.");
+
+DEFINE_string(benchmark_out, "", "The file to write additonal output to");
+
 DEFINE_bool(color_print, true, "Enables colorized logging.");

 DEFINE_int32(v, 0, "The level of verbose logging to output");
@@ -308,10 +315,7 @@ namespace internal {
 struct Benchmark::Instance {
  std::string      name;
  Benchmark*       benchmark;
-  bool           has_arg1;
-  int            arg1;
-  bool           has_arg2;
-  int            arg2;
+  std::vector<int> arg;
  TimeUnit         time_unit;
  int              range_multiplier;
  bool             use_real_time;
@@ -354,9 +358,9 @@ public:
  void Arg(int x);
  void Unit(TimeUnit unit);
  void Range(int start, int limit);
-  void DenseRange(int start, int limit);
-  void ArgPair(int start, int limit);
-  void RangePair(int lo1, int hi1, int lo2, int hi2);
+  void DenseRange(int start, int limit, int step = 1);
+  void Args(const std::vector<int>& args);
+  void Ranges(const std::vector<std::pair<int, int>>& ranges);
  void RangeMultiplier(int multiplier);
  void MinTime(double n);
  void Repetitions(int n);
@@ -371,12 +375,13 @@ public:

  static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);

+  int ArgsCnt() const { return args_.empty() ? -1 : static_cast<int>(args_.front().size()); }
+
 private:
  friend class BenchmarkFamilies;

  std::string name_;
-  int arg_count_;
-  std::vector< std::pair<int, int> > args_;  // Args for all benchmark runs
+  std::vector< std::vector<int> > args_;  // Args for all benchmark runs
  TimeUnit time_unit_;
  int range_multiplier_;
  double min_time_;
@@ -424,10 +429,10 @@ bool BenchmarkFamilies::FindBenchmarks(
    if (!bench_family) continue;
    BenchmarkImp* family = bench_family->imp_;

-    if (family->arg_count_ == -1) {
-      family->arg_count_ = 0;
-      family->args_.emplace_back(-1, -1);
+    if (family->ArgsCnt() == -1) {
+      family->Args({});
    }
+
    for (auto const& args : family->args_) {
      const std::vector<int>* thread_counts =
        (family->thread_counts_.empty()
@@ -438,10 +443,7 @@ bool BenchmarkFamilies::FindBenchmarks(
        Benchmark::Instance instance;
        instance.name = family->name_;
        instance.benchmark = bench_family.get();
-        instance.has_arg1 = family->arg_count_ >= 1;
-        instance.arg1 = args.first;
-        instance.has_arg2 = family->arg_count_ == 2;
-        instance.arg2 = args.second;
+        instance.arg = args;
        instance.time_unit = family->time_unit_;
        instance.range_multiplier = family->range_multiplier_;
        instance.min_time = family->min_time_;
@@ -454,12 +456,10 @@ bool BenchmarkFamilies::FindBenchmarks(
        instance.multithreaded = !(family->thread_counts_.empty());

        // Add arguments to instance name
-        if (family->arg_count_ >= 1) {
-          AppendHumanReadable(instance.arg1, &instance.name);
-        }
-        if (family->arg_count_ >= 2) {
-          AppendHumanReadable(instance.arg2, &instance.name);
+        for (auto const& arg : args) {
+          AppendHumanReadable(arg, &instance.name);
        }
+
        if (!IsZero(family->min_time_)) {
          instance.name +=  StringPrintF("/min_time:%0.3f",  family->min_time_);
        }
@@ -488,7 +488,7 @@ bool BenchmarkFamilies::FindBenchmarks(
 }

 BenchmarkImp::BenchmarkImp(const char* name)
-    : name_(name), arg_count_(-1), time_unit_(kNanosecond),
+    : name_(name), time_unit_(kNanosecond),
      range_multiplier_(kRangeMultiplier), min_time_(0.0), repetitions_(0),
      use_real_time_(false), use_manual_time_(false),
      complexity_(oNone) {
@@ -498,9 +498,8 @@ BenchmarkImp::~BenchmarkImp() {
 }

 void BenchmarkImp::Arg(int x) {
-  CHECK(arg_count_ == -1 || arg_count_ == 1);
-  arg_count_ = 1;
-  args_.emplace_back(x, -1);
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+  args_.push_back({x});
 }

 void BenchmarkImp::Unit(TimeUnit unit) {
@@ -508,42 +507,54 @@ void BenchmarkImp::Unit(TimeUnit unit) {
 }

 void BenchmarkImp::Range(int start, int limit) {
-  CHECK(arg_count_ == -1 || arg_count_ == 1);
-  arg_count_ = 1;
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
  std::vector<int> arglist;
  AddRange(&arglist, start, limit, range_multiplier_);

  for (int i : arglist) {
-    args_.emplace_back(i, -1);
+    args_.push_back({i});
  }
 }

-void BenchmarkImp::DenseRange(int start, int limit) {
-  CHECK(arg_count_ == -1 || arg_count_ == 1);
-  arg_count_ = 1;
+void BenchmarkImp::DenseRange(int start, int limit, int step) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
  CHECK_GE(start, 0);
  CHECK_LE(start, limit);
-  for (int arg = start; arg <= limit; arg++) {
-    args_.emplace_back(arg, -1);
+  for (int arg = start; arg <= limit; arg+= step) {
+    args_.push_back({arg});
  }
 }

-void BenchmarkImp::ArgPair(int x, int y) {
-  CHECK(arg_count_ == -1 || arg_count_ == 2);
-  arg_count_ = 2;
-  args_.emplace_back(x, y);
+void BenchmarkImp::Args(const std::vector<int>& args)
+{
+  args_.push_back(args);
 }

-void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) {
-  CHECK(arg_count_ == -1 || arg_count_ == 2);
-  arg_count_ = 2;
-  std::vector<int> arglist1, arglist2;
-  AddRange(&arglist1, lo1, hi1, range_multiplier_);
-  AddRange(&arglist2, lo2, hi2, range_multiplier_);
+void BenchmarkImp::Ranges(const std::vector<std::pair<int, int>>& ranges) {
+  std::vector<std::vector<int>> arglists(ranges.size());
+  int total = 1;
+  for (std::size_t i = 0; i < ranges.size(); i++) {
+    AddRange(&arglists[i], ranges[i].first, ranges[i].second, range_multiplier_);
+    total *= arglists[i].size();
+  }

-  for (int i : arglist1) {
-    for (int j : arglist2) {
-      args_.emplace_back(i, j);
+  std::vector<std::size_t> ctr(total, 0);
+
+  for (int i = 0; i < total; i++) {
+    std::vector<int> tmp;
+
+    for (std::size_t j = 0; j < arglists.size(); j++) {
+      tmp.push_back(arglists[j][ctr[j]]);
+    }
+
+    args_.push_back(tmp);
+
+    for (std::size_t j = 0; j < arglists.size(); j++) {
+      if (ctr[j] + 1 < arglists[j].size()) {
+        ++ctr[j];
+        break;
+      }
+      ctr[j] = 0;
    }
  }
 }
@@ -641,6 +652,7 @@ Benchmark::Benchmark(Benchmark const& other)
 }

 Benchmark* Benchmark::Arg(int x) {
+  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
  imp_->Arg(x);
  return this;
 }
@@ -651,22 +663,27 @@ Benchmark* Benchmark::Unit(TimeUnit unit) {
 }

 Benchmark* Benchmark::Range(int start, int limit) {
+  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
  imp_->Range(start, limit);
  return this;
 }

-Benchmark* Benchmark::DenseRange(int start, int limit) {
-  imp_->DenseRange(start, limit);
+Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges)
+{
+  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == static_cast<int>(ranges.size()));
+  imp_->Ranges(ranges);
  return this;
 }

-Benchmark* Benchmark::ArgPair(int x, int y) {
-  imp_->ArgPair(x, y);
+Benchmark* Benchmark::DenseRange(int start, int limit, int step) {
+  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
+  imp_->DenseRange(start, limit, step);
  return this;
 }

-Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) {
-  imp_->RangePair(lo1, hi1, lo2, hi2);
+Benchmark* Benchmark::Args(const std::vector<int>& args) {
+  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == static_cast<int>(args.size()));
+  imp_->Args(args);
  return this;
 }

@@ -744,7 +761,7 @@ namespace {
 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
                 size_t iters, int thread_id,
                 ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
-  State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id, b->threads);
+  State st(iters, b->arg, thread_id, b->threads);
  b->benchmark->Run(st);
  CHECK(st.iterations() == st.max_iterations) <<
    "Benchmark returned before State::KeepRunning() returned false!";
@@ -758,14 +775,13 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b,
  timer_manager->Finalize();
 }

-void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
-                  BenchmarkReporter* br,
-                  std::vector<BenchmarkReporter::Run>& complexity_reports)
+std::vector<BenchmarkReporter::Run>
+RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
+             std::vector<BenchmarkReporter::Run>* complexity_reports)
  EXCLUDES(GetBenchmarkLock()) {
+   std::vector<BenchmarkReporter::Run> reports; // return value
  size_t iters = 1;

-  std::vector<BenchmarkReporter::Run> reports;
-
  std::vector<std::thread> pool;
  if (b.multithreaded)
    pool.resize(b.threads);
@@ -872,7 +888,7 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
          report.complexity = b.complexity;
          report.complexity_lambda = b.complexity_lambda;
          if(report.complexity != oNone)
-            complexity_reports.push_back(report);
+            complexity_reports->push_back(report);
        }

        reports.push_back(report);
@@ -903,27 +919,26 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
                 additional_run_stats.end());

  if((b.complexity != oNone) && b.last_benchmark_instance) {
-    additional_run_stats = ComputeBigO(complexity_reports);
+    additional_run_stats = ComputeBigO(*complexity_reports);
    reports.insert(reports.end(), additional_run_stats.begin(),
                   additional_run_stats.end());
-    complexity_reports.clear();
+    complexity_reports->clear();
  }

-  br->ReportRuns(reports);
-
  if (b.multithreaded) {
    for (std::thread& thread : pool)
      thread.join();
  }
+
+  return reports;
 }

 }  // namespace

-State::State(size_t max_iters, bool has_x, int x, bool has_y, int y,
+State::State(size_t max_iters, const std::vector<int>& ranges,
             int thread_i, int n_threads)
    : started_(false), finished_(false), total_iterations_(0),
-      has_range_x_(has_x), range_x_(x),
-      has_range_y_(has_y), range_y_(y),
+      range_(ranges),
      bytes_processed_(0), items_processed_(0),
      complexity_n_(0),
      error_occurred_(false),
@@ -975,8 +990,10 @@ namespace internal {
 namespace {

 void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
-                           BenchmarkReporter* reporter) {
-  CHECK(reporter != nullptr);
+                           BenchmarkReporter* console_reporter,
+                           BenchmarkReporter* file_reporter) {
+  // Note the file_reporter can be null.
+  CHECK(console_reporter != nullptr);

  // Determine the width of the name field using a minimum width of 10.
  bool has_repetitions = FLAGS_benchmark_repetitions > 1;
@@ -1000,23 +1017,30 @@ void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
  // Keep track of runing times of all instances of current benchmark
  std::vector<BenchmarkReporter::Run> complexity_reports;

-  if (reporter->ReportContext(context)) {
+  if (console_reporter->ReportContext(context)
+      && (!file_reporter || file_reporter->ReportContext(context))) {
    for (const auto& benchmark : benchmarks) {
-      RunBenchmark(benchmark, reporter, complexity_reports);
+      std::vector<BenchmarkReporter::Run> reports =
+          RunBenchmark(benchmark, &complexity_reports);
+      console_reporter->ReportRuns(reports);
+      if (file_reporter) file_reporter->ReportRuns(reports);
    }
  }
+  console_reporter->Finalize();
+  if (file_reporter) file_reporter->Finalize();
 }

-std::unique_ptr<BenchmarkReporter> GetDefaultReporter() {
+std::unique_ptr<BenchmarkReporter>
+CreateReporter(std::string const& name, ConsoleReporter::OutputOptions allow_color) {
  typedef std::unique_ptr<BenchmarkReporter> PtrType;
-  if (FLAGS_benchmark_format == "console") {
-    return PtrType(new ConsoleReporter);
-  } else if (FLAGS_benchmark_format == "json") {
+  if (name == "console") {
+    return PtrType(new ConsoleReporter(allow_color));
+  } else if (name == "json") {
    return PtrType(new JSONReporter);
-  } else if (FLAGS_benchmark_format == "csv") {
+  } else if (name == "csv") {
    return PtrType(new CSVReporter);
  } else {
-    std::cerr << "Unexpected format: '" << FLAGS_benchmark_format << "'\n";
+    std::cerr << "Unexpected format: '" << name << "'\n";
    std::exit(1);
  }
 }
@@ -1025,10 +1049,17 @@ std::unique_ptr<BenchmarkReporter> GetDefaultReporter() {
 } // end namespace internal

 size_t RunSpecifiedBenchmarks() {
-  return RunSpecifiedBenchmarks(nullptr);
+  return RunSpecifiedBenchmarks(nullptr, nullptr);
 }

-size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter) {
+
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) {
+  return RunSpecifiedBenchmarks(console_reporter, nullptr);
+}
+
+
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
+                              BenchmarkReporter* file_reporter) {
  std::string spec = FLAGS_benchmark_filter;
  if (spec.empty() || spec == "all")
    spec = ".";  // Regexp that matches all benchmarks
@@ -1041,13 +1072,38 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter) {
    for (auto const& benchmark : benchmarks)
      std::cout <<  benchmark.name << "\n";
  } else {
-    std::unique_ptr<BenchmarkReporter> default_reporter;
-    if (!reporter) {
-      default_reporter = internal::GetDefaultReporter();
-      reporter = default_reporter.get();
+    // Setup the reporters
+    std::ofstream output_file;
+    std::unique_ptr<BenchmarkReporter> default_console_reporter;
+    std::unique_ptr<BenchmarkReporter> default_file_reporter;
+    if (!console_reporter) {
+      auto output_opts = FLAGS_color_print ? ConsoleReporter::OO_Color
+                                           : ConsoleReporter::OO_None;
+      default_console_reporter = internal::CreateReporter(
+          FLAGS_benchmark_format, output_opts);
+      console_reporter = default_console_reporter.get();
    }
-    internal::RunMatchingBenchmarks(benchmarks, reporter);
-    reporter->Finalize();
+    std::string const& fname = FLAGS_benchmark_out;
+    if (fname == "" && file_reporter) {
+      std::cerr << "A custom file reporter was provided but "
+                   "--benchmark_out=<file> was not specified." << std::endl;
+      std::exit(1);
+    }
+    if (fname != "") {
+      output_file.open(fname);
+      if (!output_file.is_open()) {
+        std::cerr << "invalid file name: '" << fname << std::endl;
+        std::exit(1);
+      }
+      if (!file_reporter) {
+        default_file_reporter = internal::CreateReporter(
+            FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
+        file_reporter = default_file_reporter.get();
+      }
+      file_reporter->SetOutputStream(&output_file);
+      file_reporter->SetErrorStream(&output_file);
+    }
+    internal::RunMatchingBenchmarks(benchmarks, console_reporter, file_reporter);
  }
  return benchmarks.size();
 }
@@ -1062,6 +1118,8 @@ void PrintUsageAndExit() {
          "          [--benchmark_min_time=<min_time>]\n"
          "          [--benchmark_repetitions=<num_repetitions>]\n"
          "          [--benchmark_format=<console|json|csv>]\n"
+          "          [--benchmark_out=<filename>]\n"
+          "          [--benchmark_out_format=<json|console|csv>]\n"
          "          [--color_print={true|false}]\n"
          "          [--v=<verbosity>]\n");
  exit(0);
@@ -1081,6 +1139,10 @@ void ParseCommandLineFlags(int* argc, char** argv) {
                       &FLAGS_benchmark_repetitions) ||
        ParseStringFlag(argv[i], "benchmark_format",
                        &FLAGS_benchmark_format) ||
+        ParseStringFlag(argv[i], "benchmark_out",
+                        &FLAGS_benchmark_out) ||
+        ParseStringFlag(argv[i], "benchmark_out_format",
+                        &FLAGS_benchmark_out_format) ||
        ParseBoolFlag(argv[i], "color_print",
                       &FLAGS_color_print) ||
        ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
@@ -1092,10 +1154,9 @@ void ParseCommandLineFlags(int* argc, char** argv) {
      PrintUsageAndExit();
    }
  }
-
-  if (FLAGS_benchmark_format != "console" &&
-      FLAGS_benchmark_format != "json" &&
-      FLAGS_benchmark_format != "csv") {
+  for (auto const* flag : {&FLAGS_benchmark_format,
+                           &FLAGS_benchmark_out_format})
+  if (*flag != "console" && *flag != "json" && *flag != "csv") {
    PrintUsageAndExit();
  }
 }
--- a/utils/google-benchmark/src/colorprint.cc
+++ b/utils/google-benchmark/src/colorprint.cc
@@ -20,7 +20,6 @@
 #include <string>
 #include <memory>

-#include "commandlineflags.h"
 #include "check.h"
 #include "internal_macros.h"

@@ -28,8 +27,6 @@
 #include <Windows.h>
 #endif

-DECLARE_bool(color_print);
-
 namespace benchmark {
 namespace {
 #ifdef BENCHMARK_OS_WINDOWS
@@ -120,14 +117,14 @@ std::string FormatString(const char *msg, ...) {
 void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) {
  va_list args;
  va_start(args, fmt);
-
-  if (!FLAGS_color_print) {
-    out << FormatString(fmt, args);
+  ColorPrintf(out, color, fmt, args);
  va_end(args);
-    return;
 }

+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list args) {
 #ifdef BENCHMARK_OS_WINDOWS
+  ((void)out); // suppress unused warning
+
  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);

  // Gets the current text color.
@@ -152,7 +149,6 @@ void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) {
  out << FormatString(fmt, args) << "\033[m";
 #endif

-  va_end(args);
 }

 }  // end namespace benchmark
--- a/utils/google-benchmark/src/colorprint.h
+++ b/utils/google-benchmark/src/colorprint.h
@@ -20,6 +20,7 @@ enum LogColor {
 std::string FormatString(const char* msg, va_list args);
 std::string FormatString(const char* msg, ...);

+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list args);
 void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...);

 }  // end namespace benchmark
--- a/utils/google-benchmark/src/complexity.cc
+++ b/utils/google-benchmark/src/complexity.cc
@@ -31,9 +31,9 @@ BigOFunc* FittingCurve(BigO complexity) {
    case oN:
      return [](int n) -> double { return n; };
    case oNSquared:
-      return [](int n) -> double { return n * n; };
+      return [](int n) -> double { return std::pow(n, 2); };
    case oNCubed:
-      return [](int n) -> double { return n * n * n; };
+      return [](int n) -> double { return std::pow(n, 3); };
    case oLogN:
      return [](int n) { return std::log2(n); };
    case oNLogN:
--- a/utils/google-benchmark/src/console_reporter.cc
+++ b/utils/google-benchmark/src/console_reporter.cc
@@ -30,8 +30,6 @@
 #include "string_util.h"
 #include "walltime.h"

-DECLARE_bool(color_print);
-
 namespace benchmark {

 bool ConsoleReporter::ReportContext(const Context& context) {
@@ -40,10 +38,10 @@ bool ConsoleReporter::ReportContext(const Context& context) {
  PrintBasicContext(&GetErrorStream(), context);

 #ifdef BENCHMARK_OS_WINDOWS
-  if (FLAGS_color_print && &std::cout != &GetOutputStream()) {
+  if (color_output_ && &std::cout != &GetOutputStream()) {
      GetErrorStream() << "Color printing is only supported for stdout on windows."
                          " Disabling color printing\n";
-      FLAGS_color_print = false;
+      color_output_ = false;
  }
 #endif
  std::string str = FormatString("%-*s %13s %13s %10s\n",
@@ -59,18 +57,29 @@ void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
    PrintRunData(run);
 }

-void ConsoleReporter::PrintRunData(const Run& result) {
-  auto& Out = GetOutputStream();
+static void  IgnoreColorPrint(std::ostream& out, LogColor,
+                               const char* fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+    out << FormatString(fmt, args);
+    va_end(args);
+}

+void ConsoleReporter::PrintRunData(const Run& result) {
+  typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
+  auto& Out = GetOutputStream();
+  PrinterFn* printer = color_output_ ? (PrinterFn*)ColorPrintf
+                                     : IgnoreColorPrint;
  auto name_color =
      (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
-  ColorPrintf(Out, name_color, "%-*s ", name_field_width_,
+  printer(Out, name_color, "%-*s ", name_field_width_,
              result.benchmark_name.c_str());

  if (result.error_occurred) {
-    ColorPrintf(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
+    printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
                result.error_message.c_str());
-    ColorPrintf(Out, COLOR_DEFAULT, "\n");
+    printer(Out, COLOR_DEFAULT, "\n");
    return;
  }
  // Format bytes per second
@@ -91,34 +100,34 @@ void ConsoleReporter::PrintRunData(const Run& result) {

  if (result.report_big_o) {
    std::string big_o = GetBigOString(result.complexity);
-    ColorPrintf(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time,
+    printer(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time,
                big_o.c_str(), cpu_time, big_o.c_str());
  } else if (result.report_rms) {
-    ColorPrintf(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100,
+    printer(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100,
                cpu_time * 100);
  } else {
    const char* timeLabel = GetTimeUnitString(result.time_unit);
-    ColorPrintf(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel,
+    printer(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel,
                cpu_time, timeLabel);
  }

  if (!result.report_big_o && !result.report_rms) {
-    ColorPrintf(Out, COLOR_CYAN, "%10lld", result.iterations);
+    printer(Out, COLOR_CYAN, "%10lld", result.iterations);
  }

  if (!rate.empty()) {
-    ColorPrintf(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str());
+    printer(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str());
  }

  if (!items.empty()) {
-    ColorPrintf(Out, COLOR_DEFAULT, " %*s", 18, items.c_str());
+    printer(Out, COLOR_DEFAULT, " %*s", 18, items.c_str());
  }

  if (!result.report_label.empty()) {
-    ColorPrintf(Out, COLOR_DEFAULT, " %s", result.report_label.c_str());
+    printer(Out, COLOR_DEFAULT, " %s", result.report_label.c_str());
  }

-  ColorPrintf(Out, COLOR_DEFAULT, "\n");
+  printer(Out, COLOR_DEFAULT, "\n");
 }

 }  // end namespace benchmark
--- a/utils/google-benchmark/src/cycleclock.h
+++ b/utils/google-benchmark/src/cycleclock.h
@@ -113,11 +113,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
  uint32_t pmuseren;
  uint32_t pmcntenset;
  // Read the user mode perf monitor counter access permissions.
-  asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
+  asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
  if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
-    asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
+    asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
    if (pmcntenset & 0x80000000ul) {  // Is it counting?
-      asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
+      asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
      // The counter is set up to count every 64th cycle
      return static_cast<int64_t>(pmccntr) * 64;  // Should optimize to << 6
    }
--- a/utils/google-benchmark/src/sysinfo.cc
+++ b/utils/google-benchmark/src/sysinfo.cc
@@ -239,6 +239,7 @@ void InitializeSystemInfo() {
  }
 // TODO: also figure out cpuinfo_num_cpus

+
 #elif defined BENCHMARK_OS_WINDOWS
  // In NT, read MHz from the registry. If we fail to do so or we're in win9x
  // then make a crude estimate.
@@ -251,7 +252,12 @@ void InitializeSystemInfo() {
    cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
  else
    cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
-// TODO: also figure out cpuinfo_num_cpus
+
+  SYSTEM_INFO sysinfo;
+  // Use memset as opposed to = {} to avoid GCC missing initializer false positives.
+  std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
+  GetSystemInfo(&sysinfo);
+  cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical processors in the current group

 #elif defined BENCHMARK_OS_MACOSX
  // returning "mach time units" per second. the current number of elapsed
--- a/utils/google-benchmark/test/CMakeLists.txt
+++ b/utils/google-benchmark/test/CMakeLists.txt
@@ -45,9 +45,15 @@ add_test(donotoptimize_test donotoptimize_test --benchmark_min_time=0.01)
 compile_benchmark_test(fixture_test)
 add_test(fixture_test fixture_test --benchmark_min_time=0.01)

+compile_benchmark_test(register_benchmark_test)
+add_test(register_benchmark_test register_benchmark_test --benchmark_min_time=0.01)
+
 compile_benchmark_test(map_test)
 add_test(map_test map_test --benchmark_min_time=0.01)

+compile_benchmark_test(multiple_ranges_test)
+add_test(multiple_ranges_test multiple_ranges_test --benchmark_min_time=0.01)
+
 compile_benchmark_test(reporter_output_test)
 add_test(reporter_output_test reporter_output_test --benchmark_min_time=0.01)

--- a/utils/google-benchmark/test/basic_test.cc
+++ b/utils/google-benchmark/test/basic_test.cc
@@ -14,7 +14,7 @@ BENCHMARK(BM_empty)->ThreadPerCpu();

 void BM_spin_empty(benchmark::State& state) {
  while (state.KeepRunning()) {
-    for (int x = 0; x < state.range_x(); ++x) {
+    for (int x = 0; x < state.range(0); ++x) {
      benchmark::DoNotOptimize(x);
    }
  }
@@ -23,11 +23,11 @@ BASIC_BENCHMARK_TEST(BM_spin_empty);
 BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu();

 void BM_spin_pause_before(benchmark::State& state) {
-  for (int i = 0; i < state.range_x(); ++i) {
+  for (int i = 0; i < state.range(0); ++i) {
    benchmark::DoNotOptimize(i);
  }
  while(state.KeepRunning()) {
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
@@ -39,11 +39,11 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
 void BM_spin_pause_during(benchmark::State& state) {
  while(state.KeepRunning()) {
    state.PauseTiming();
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
    state.ResumeTiming();
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
@@ -64,11 +64,11 @@ BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu();

 void BM_spin_pause_after(benchmark::State& state) {
  while(state.KeepRunning()) {
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
-  for (int i = 0; i < state.range_x(); ++i) {
+  for (int i = 0; i < state.range(0); ++i) {
    benchmark::DoNotOptimize(i);
  }
 }
@@ -77,15 +77,15 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu();


 void BM_spin_pause_before_and_after(benchmark::State& state) {
-  for (int i = 0; i < state.range_x(); ++i) {
+  for (int i = 0; i < state.range(0); ++i) {
    benchmark::DoNotOptimize(i);
  }
  while(state.KeepRunning()) {
-    for (int i = 0; i < state.range_x(); ++i) {
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(i);
    }
  }
-  for (int i = 0; i < state.range_x(); ++i) {
+  for (int i = 0; i < state.range(0); ++i) {
    benchmark::DoNotOptimize(i);
  }
 }
--- a/utils/google-benchmark/test/benchmark_test.cc
+++ b/utils/google-benchmark/test/benchmark_test.cc
@@ -67,7 +67,7 @@ BENCHMARK(BM_Factorial)->UseRealTime();
 static void BM_CalculatePiRange(benchmark::State& state) {
  double pi = 0.0;
  while (state.KeepRunning())
-    pi = CalculatePi(state.range_x());
+    pi = CalculatePi(state.range(0));
  std::stringstream ss;
  ss << pi;
  state.SetLabel(ss.str());
@@ -87,25 +87,25 @@ BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
 static void BM_SetInsert(benchmark::State& state) {
  while (state.KeepRunning()) {
    state.PauseTiming();
-    std::set<int> data = ConstructRandomSet(state.range_x());
+    std::set<int> data = ConstructRandomSet(state.range(0));
    state.ResumeTiming();
-    for (int j = 0; j < state.range_y(); ++j)
+    for (int j = 0; j < state.range(1); ++j)
      data.insert(rand());
  }
-  state.SetItemsProcessed(state.iterations() * state.range_y());
-  state.SetBytesProcessed(state.iterations() * state.range_y() * sizeof(int));
+  state.SetItemsProcessed(state.iterations() * state.range(1));
+  state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int));
 }
-BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10,8<<10}, {1,10}});

 template<typename Container, typename ValueType = typename Container::value_type>
 static void BM_Sequential(benchmark::State& state) {
  ValueType v = 42;
  while (state.KeepRunning()) {
    Container c;
-    for (int i = state.range_x(); --i; )
+    for (int i = state.range(0); --i; )
      c.push_back(v);
  }
-  const size_t items_processed = state.iterations() * state.range_x();
+  const size_t items_processed = state.iterations() * state.range(0);
  state.SetItemsProcessed(items_processed);
  state.SetBytesProcessed(items_processed * sizeof(v));
 }
@@ -117,8 +117,8 @@ BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
 #endif

 static void BM_StringCompare(benchmark::State& state) {
-  std::string s1(state.range_x(), '-');
-  std::string s2(state.range_x(), '-');
+  std::string s1(state.range(0), '-');
+  std::string s2(state.range(0), '-');
  while (state.KeepRunning())
    benchmark::DoNotOptimize(s1.compare(s2));
 }
@@ -147,14 +147,14 @@ BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
 static void BM_LongTest(benchmark::State& state) {
  double tracker = 0.0;
  while (state.KeepRunning()) {
-    for (int i = 0; i < state.range_x(); ++i)
+    for (int i = 0; i < state.range(0); ++i)
      benchmark::DoNotOptimize(tracker += i);
  }
 }
 BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);

 static void BM_ParallelMemset(benchmark::State& state) {
-  int size = state.range_x() / sizeof(int);
+  int size = state.range(0) / sizeof(int);
  int thread_size = size / state.threads;
  int from = thread_size * state.thread_index;
  int to = from + thread_size;
@@ -179,7 +179,7 @@ BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);

 static void BM_ManualTiming(benchmark::State& state) {
  size_t slept_for = 0;
-  int microseconds = state.range_x();
+  int microseconds = state.range(0);
  std::chrono::duration<double, std::micro> sleep_duration {
    static_cast<double>(microseconds)
  };
--- a/utils/google-benchmark/test/complexity_test.cc
+++ b/utils/google-benchmark/test/complexity_test.cc
@@ -36,18 +36,27 @@ struct TestCase {
    CHECK(err_str.empty()) << "Could not construct regex \"" << regex << "\""
                           << " got Error: " << err_str;

+    std::string near = "<EOF>";
    std::string line;
+    bool first = true;
    while (remaining_output.eof() == false) {
        CHECK(remaining_output.good());
        std::getline(remaining_output, line);
+        // Keep the first line as context.
+        if (first) {
+            near = line;
+            first = false;
+        }
        if (r.Match(line)) return;
        CHECK(match_rule != MR_Next) << "Expected line \"" << line
-                                     << "\" to match regex \"" << regex << "\"";
+                                     << "\" to match regex \"" << regex << "\""
+                                     << "\nstarted matching at line: \"" << near << "\"";
    }

    CHECK(remaining_output.eof() == false)
        << "End of output reached before match for regex \"" << regex
-        << "\" was found";
+        << "\" was found"
+        << "\nstarted matching at line: \"" << near << "\"";
  }
 };

@@ -112,7 +121,7 @@ std::string join(First f, Args&&... args) {
    return std::string(std::move(f)) + "[ ]+" + join(std::forward<Args>(args)...);
 }

-std::string dec_re = "[0-9]+\\.[0-9]+";
+std::string dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";

 #define ADD_COMPLEXITY_CASES(...) \
    int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__)
@@ -138,7 +147,7 @@ int AddComplexityTest(std::vector<TestCase>* console_out, std::vector<TestCase>*
  });
  AddCases(csv_out, {
    {"^\"" + big_o_test_name + "\",," + dec_re + "," + dec_re + "," + big_o + ",,,,,$"},
-    {"^\"" + rms_test_name + "\",," + dec_re + "," + dec_re + ",,,,,,$"}
+    {"^\"" + rms_test_name + "\",," + dec_re + "," + dec_re + ",,,,,,$", MR_Next}
  });
  return 0;
 }
@@ -151,12 +160,15 @@ int AddComplexityTest(std::vector<TestCase>* console_out, std::vector<TestCase>*

 void BM_Complexity_O1(benchmark::State& state) {
  while (state.KeepRunning()) {
+      for (int i=0; i < 1024; ++i) {
+          benchmark::DoNotOptimize(&i);
      }
-  state.SetComplexityN(state.range_x());
+  }
+  state.SetComplexityN(state.range(0));
 }
 BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(benchmark::o1);
-BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity([](int){return 1.0; });
 BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity();
+BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity([](int){return 1.0; });

 const char* big_o_1_test_name = "BM_Complexity_O1_BigO";
 const char* rms_o_1_test_name = "BM_Complexity_O1_RMS";
@@ -167,6 +179,10 @@ const char* lambda_big_o_1 = "f\\(N\\)";
 ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, 
                     big_o_1_test_name, rms_o_1_test_name, enum_auto_big_o_1);

+// Add auto enum tests
+ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests,
+                     big_o_1_test_name, rms_o_1_test_name, enum_auto_big_o_1);
+
 // Add lambda tests
 ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, 
                     big_o_1_test_name, rms_o_1_test_name, lambda_big_o_1);
@@ -185,12 +201,12 @@ std::vector<int> ConstructRandomVector(int size) {
 }

 void BM_Complexity_O_N(benchmark::State& state) {
-  auto v = ConstructRandomVector(state.range_x());
-  const int item_not_in_vector = state.range_x()*2; // Test worst case scenario (item not in vector)
+  auto v = ConstructRandomVector(state.range(0));
+  const int item_not_in_vector = state.range(0)*2; // Test worst case scenario (item not in vector)
  while (state.KeepRunning()) {
      benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
  }
-  state.SetComplexityN(state.range_x());
+  state.SetComplexityN(state.range(0));
 }
 BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oN);
 BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) -> double{return n; });
@@ -214,11 +230,11 @@ ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests,
 // ========================================================================= //

 static void BM_Complexity_O_N_log_N(benchmark::State& state) {
-  auto v = ConstructRandomVector(state.range_x());
+  auto v = ConstructRandomVector(state.range(0));
  while (state.KeepRunning()) {
      std::sort(v.begin(), v.end());
  }
-  state.SetComplexityN(state.range_x());
+  state.SetComplexityN(state.range(0));
 }
 BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oNLogN);
 BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) {return n * std::log2(n); });
@@ -244,14 +260,8 @@ ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests,


 int main(int argc, char* argv[]) {
-  // Add --color_print=false to argv since we don't want to match color codes.
-  char new_arg[64];
-  char* new_argv[64];
-  std::copy(argv, argv + argc, new_argv);
-  new_argv[argc++] = std::strcpy(new_arg, "--color_print=false");
-  benchmark::Initialize(&argc, new_argv);
-
-  benchmark::ConsoleReporter CR;
+  benchmark::Initialize(&argc, argv);
+  benchmark::ConsoleReporter CR(benchmark::ConsoleReporter::OO_None);
  benchmark::JSONReporter JR;
  benchmark::CSVReporter CSVR;
  struct ReporterTest {
--- a/utils/google-benchmark/test/fixture_test.cc
+++ b/utils/google-benchmark/test/fixture_test.cc
@@ -44,7 +44,7 @@ BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) {
    assert(data.get() != nullptr);
    assert(*data == 42);
  }
-  st.SetItemsProcessed(st.range_x());
+  st.SetItemsProcessed(st.range(0));
 }
 BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42);
 BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42)->ThreadPerCpu();
--- a/utils/google-benchmark/test/map_test.cc
+++ b/utils/google-benchmark/test/map_test.cc
@@ -17,7 +17,7 @@ std::map<int, int> ConstructRandomMap(int size) {

 // Basic version.
 static void BM_MapLookup(benchmark::State& state) {
-  const int size = state.range_x();
+  const int size = state.range(0);
  while (state.KeepRunning()) {
    state.PauseTiming();
    std::map<int, int> m = ConstructRandomMap(size);
@@ -34,7 +34,7 @@ BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12);
 class MapFixture : public ::benchmark::Fixture {
 public:
  void SetUp(const ::benchmark::State& st) {
-    m = ConstructRandomMap(st.range_x());
+    m = ConstructRandomMap(st.range(0));
  }

  void TearDown(const ::benchmark::State&) {
@@ -45,7 +45,7 @@ class MapFixture : public ::benchmark::Fixture {
 };

 BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) {
-  const int size = state.range_x();
+  const int size = state.range(0);
  while (state.KeepRunning()) {
    for (int i = 0; i < size; ++i) {
      benchmark::DoNotOptimize(m.find(rand() % size));
--- a/utils/google-benchmark/test/multiple_ranges_test.cc
+++ b/utils/google-benchmark/test/multiple_ranges_test.cc
@@ -0,0 +1,46 @@
+#include "benchmark/benchmark.h"
+
+#include <set>
+#include <cassert>
+
+class MultipleRangesFixture : public ::benchmark::Fixture {
+ public:
+  MultipleRangesFixture()
+      : expectedValues({
+        {1, 3, 5}, {1, 3, 8}, {1, 3, 15}, {2, 3, 5}, {2, 3, 8}, {2, 3, 15},
+        {1, 4, 5}, {1, 4, 8}, {1, 4, 15}, {2, 4, 5}, {2, 4, 8}, {2, 4, 15},
+        {1, 7, 5}, {1, 7, 8}, {1, 7, 15}, {2, 7, 5}, {2, 7, 8}, {2, 7, 15},
+        {7, 6, 3}
+      })
+  {
+  }
+
+  void SetUp(const ::benchmark::State& state) {
+    std::vector<int> ranges = {state.range(0), state.range(1), state.range(2)};
+
+    assert(expectedValues.find(ranges) != expectedValues.end());
+
+    actualValues.insert(ranges);
+  }
+
+  virtual ~MultipleRangesFixture() {
+    assert(actualValues.size() == expectedValues.size());
+  }
+  
+  std::set<std::vector<int>> expectedValues;
+  std::set<std::vector<int>> actualValues;
+};
+
+
+BENCHMARK_DEFINE_F(MultipleRangesFixture, Empty)(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    int product = state.range(0) * state.range(1) * state.range(2);
+    for (int x = 0; x < product; x++) {
+      benchmark::DoNotOptimize(x);
+    }
+  }
+}
+
+BENCHMARK_REGISTER_F(MultipleRangesFixture, Empty)->RangeMultiplier(2)->Ranges({{1, 2}, {3, 7}, {5, 15}})->Args({7, 6, 3});
+
+BENCHMARK_MAIN()
--- a/utils/google-benchmark/test/options_test.cc
+++ b/utils/google-benchmark/test/options_test.cc
@@ -9,7 +9,7 @@ void BM_basic(benchmark::State& state) {
 }

 void BM_basic_slow(benchmark::State& state) {
-  std::chrono::milliseconds sleep_duration(state.range_x());
+  std::chrono::milliseconds sleep_duration(state.range(0));
  while (state.KeepRunning()) {
    std::this_thread::sleep_for(
      std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration)
@@ -25,8 +25,8 @@ BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond);
 BENCHMARK(BM_basic)->Range(1, 8);
 BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8);
 BENCHMARK(BM_basic)->DenseRange(10, 15);
-BENCHMARK(BM_basic)->ArgPair(42, 42);
-BENCHMARK(BM_basic)->RangePair(64, 512, 64, 512);
+BENCHMARK(BM_basic)->Args({42, 42});
+BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}});
 BENCHMARK(BM_basic)->MinTime(0.7);
 BENCHMARK(BM_basic)->UseRealTime();
 BENCHMARK(BM_basic)->ThreadRange(2, 4);
--- a/utils/google-benchmark/test/register_benchmark_test.cc
+++ b/utils/google-benchmark/test/register_benchmark_test.cc
@@ -0,0 +1,149 @@
+
+#undef NDEBUG
+#include "benchmark/benchmark.h"
+#include "../src/check.h" // NOTE: check.h is for internal use only!
+#include <cassert>
+#include <vector>
+
+namespace {
+
+class TestReporter : public benchmark::ConsoleReporter {
+public:
+  virtual void ReportRuns(const std::vector<Run>& report) {
+    all_runs_.insert(all_runs_.end(), begin(report), end(report));
+    ConsoleReporter::ReportRuns(report);
+  }
+
+  std::vector<Run> all_runs_;
+};
+
+struct TestCase {
+  std::string name;
+  const char* label;
+  TestCase(const char* xname) : name(xname), label(nullptr) {}
+  TestCase(const char* xname, const char* xlabel)
+    : name(xname), label(xlabel) {}
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+
+  void CheckRun(Run const& run) const {
+    CHECK(name == run.benchmark_name) << "expected " << name
+                                      << " got " << run.benchmark_name;
+    if (label) {
+      CHECK(run.report_label == label) << "expected " << label
+                                       << " got " << run.report_label;
+    } else {
+      CHECK(run.report_label == "");
+    }
+  }
+};
+
+std::vector<TestCase> ExpectedResults;
+
+int AddCases(std::initializer_list<TestCase> const& v) {
+  for (auto N : v) {
+    ExpectedResults.push_back(N);
+  }
+  return 0;
+}
+
+#define CONCAT(x, y) CONCAT2(x, y)
+#define CONCAT2(x, y) x##y
+#define ADD_CASES(...) \
+int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__})
+
+}  // end namespace
+
+typedef benchmark::internal::Benchmark* ReturnVal;
+
+//----------------------------------------------------------------------------//
+// Test RegisterBenchmark with no additional arguments
+//----------------------------------------------------------------------------//
+void BM_function(benchmark::State& state) { while (state.KeepRunning()) {} }
+BENCHMARK(BM_function);
+ReturnVal dummy = benchmark::RegisterBenchmark(
+    "BM_function_manual_registration",
+     BM_function);
+ADD_CASES({"BM_function"}, {"BM_function_manual_registration"});
+
+//----------------------------------------------------------------------------//
+// Test RegisterBenchmark with additional arguments
+// Note: GCC <= 4.8 do not support this form of RegisterBenchmark because they
+//       reject the variadic pack expansion of lambda captures.
+//----------------------------------------------------------------------------//
+#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+
+void BM_extra_args(benchmark::State& st, const char* label) {
+  while (st.KeepRunning()) {}
+  st.SetLabel(label);
+}
+int RegisterFromFunction() {
+  std::pair<const char*, const char*> cases[] = {
+      {"test1", "One"},
+      {"test2", "Two"},
+      {"test3", "Three"}
+  };
+  for (auto& c : cases)
+    benchmark::RegisterBenchmark(c.first, &BM_extra_args, c.second);
+  return 0;
+}
+int dummy2 = RegisterFromFunction();
+ADD_CASES(
+  {"test1", "One"},
+  {"test2", "Two"},
+  {"test3", "Three"}
+);
+
+#endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+
+//----------------------------------------------------------------------------//
+// Test RegisterBenchmark with different callable types
+//----------------------------------------------------------------------------//
+
+struct CustomFixture {
+  void operator()(benchmark::State& st) {
+    while (st.KeepRunning()) {}
+  }
+};
+
+void TestRegistrationAtRuntime() {
+#ifdef BENCHMARK_HAS_CXX11
+  {
+    CustomFixture fx;
+    benchmark::RegisterBenchmark("custom_fixture", fx);
+    AddCases({"custom_fixture"});
+  }
+#endif
+#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+  {
+    int x = 42;
+    auto capturing_lam = [=](benchmark::State& st) {
+      while (st.KeepRunning()) {}
+      st.SetLabel(std::to_string(x));
+    };
+    benchmark::RegisterBenchmark("lambda_benchmark", capturing_lam);
+    AddCases({{"lambda_benchmark", "42"}});
+  }
+#endif
+}
+
+int main(int argc, char* argv[]) {
+  TestRegistrationAtRuntime();
+
+  benchmark::Initialize(&argc, argv);
+
+  TestReporter test_reporter;
+  benchmark::RunSpecifiedBenchmarks(&test_reporter);
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+  auto EB = ExpectedResults.begin();
+
+  for (Run const& run : test_reporter.all_runs_) {
+    assert(EB != ExpectedResults.end());
+    EB->CheckRun(run);
+    ++EB;
+  }
+  assert(EB == ExpectedResults.end());
+
+  return 0;
+}
--- a/utils/google-benchmark/test/reporter_output_test.cc
+++ b/utils/google-benchmark/test/reporter_output_test.cc
@@ -114,7 +114,9 @@ std::string join(First f, Args&&... args) {
    return std::string(std::move(f)) + "[ ]+" + join(std::forward<Args>(args)...);
 }

-std::string dec_re = "[0-9]+\\.[0-9]+";
+
+
+std::string dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";

 }  // end namespace

@@ -185,7 +187,7 @@ ADD_CASES(&CSVOutputTests, {
 void BM_Complexity_O1(benchmark::State& state) {
  while (state.KeepRunning()) {
  }
-  state.SetComplexityN(state.range_x());
+  state.SetComplexityN(state.range(0));
 }
 BENCHMARK(BM_Complexity_O1)->Range(1, 1<<18)->Complexity(benchmark::o1);

@@ -203,14 +205,8 @@ ADD_CASES(&ConsoleOutputTests, {


 int main(int argc, char* argv[]) {
-  // Add --color_print=false to argv since we don't want to match color codes.
-  char new_arg[64];
-  char* new_argv[64];
-  std::copy(argv, argv + argc, new_argv);
-  new_argv[argc++] = std::strcpy(new_arg, "--color_print=false");
-  benchmark::Initialize(&argc, new_argv);
-
-  benchmark::ConsoleReporter CR;
+  benchmark::Initialize(&argc, argv);
+  benchmark::ConsoleReporter CR(benchmark::ConsoleReporter::OO_None);
  benchmark::JSONReporter JR;
  benchmark::CSVReporter CSVR;
  struct ReporterTest {
--- a/utils/google-benchmark/test/skip_with_error_test.cc
+++ b/utils/google-benchmark/test/skip_with_error_test.cc
@@ -74,7 +74,7 @@ ADD_CASES("BM_error_before_running",
 void BM_error_during_running(benchmark::State& state) {
  int first_iter = true;
  while (state.KeepRunning()) {
-    if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) {
+    if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) {
      assert(first_iter);
      first_iter = false;
      state.SkipWithError("error message");
@@ -116,7 +116,7 @@ ADD_CASES(
 void BM_error_while_paused(benchmark::State& state) {
  bool first_iter = true;
  while (state.KeepRunning()) {
-    if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) {
+    if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) {
      assert(first_iter);
      first_iter = false;
      state.PauseTiming();
--- a/utils/google-benchmark/tools/compare_bench.py
+++ b/utils/google-benchmark/tools/compare_bench.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""
+compare_bench.py - Compare two benchmarks or their results and report the
+                   difference.
+"""
+import sys
+import gbench
+from gbench import util, report
+
+def main():
+    # Parse the command line flags
+    def usage():
+        print('compare_bench.py <test1> <test2> [benchmark options]...')
+        exit(1)
+    if '--help' in sys.argv or len(sys.argv) < 3:
+        usage()
+    tests = sys.argv[1:3]
+    bench_opts = sys.argv[3:]
+    bench_opts = list(bench_opts)
+    # Run the benchmarks and report the results
+    json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts)
+    json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts)
+    output_lines = gbench.report.generate_difference_report(json1, json2)
+    print 'Comparing %s to %s' % (tests[0], tests[1])
+    for ln in output_lines:
+        print(ln)
+
+
+if __name__ == '__main__':
+    main()
--- a/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json
+++ b/utils/google-benchmark/tools/gbench/Inputs/test1_run1.json
@@ -0,0 +1,46 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_SameTimes",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xFaster",
+      "iterations": 1000,
+      "real_time": 50,
+      "cpu_time": 50,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xSlower",
+      "iterations": 1000,
+      "real_time": 50,
+      "cpu_time": 50,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentFaster",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentSlower",
+      "iterations": 1000,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    }
+  ]
+}
--- a/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json
+++ b/utils/google-benchmark/tools/gbench/Inputs/test1_run2.json
@@ -0,0 +1,46 @@
+{
+  "context": {
+    "date": "2016-08-02 17:44:46",
+    "num_cpus": 4,
+    "mhz_per_cpu": 4228,
+    "cpu_scaling_enabled": false,
+    "library_build_type": "release"
+  },
+  "benchmarks": [
+    {
+      "name": "BM_SameTimes",
+      "iterations": 1000,
+      "real_time": 10,
+      "cpu_time": 10,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xFaster",
+      "iterations": 1000,
+      "real_time": 25,
+      "cpu_time": 25,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_2xSlower",
+      "iterations": 20833333,
+      "real_time": 100,
+      "cpu_time": 100,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentFaster",
+      "iterations": 1000,
+      "real_time": 90,
+      "cpu_time": 90,
+      "time_unit": "ns"
+    },
+    {
+      "name": "BM_10PercentSlower",
+      "iterations": 1000,
+      "real_time": 110,
+      "cpu_time": 110,
+      "time_unit": "ns"
+    }
+  ]
+}
--- a/utils/google-benchmark/tools/gbench/init.py
+++ b/utils/google-benchmark/tools/gbench/init.py
@@ -0,0 +1,8 @@
+"""Google Benchmark tooling"""
+
+__author__ = 'Eric Fiselier'
+__email__ = 'eric@efcs.ca'
+__versioninfo__ = (0, 5, 0)
+__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
+
+__all__ = []
--- a/utils/google-benchmark/tools/gbench/report.py
+++ b/utils/google-benchmark/tools/gbench/report.py
@@ -0,0 +1,136 @@
+"""report.py - Utilities for reporting statistics about benchmark results
+"""
+import os
+
+class BenchmarkColor(object):
+    def __init__(self, name, code):
+        self.name = name
+        self.code = code
+
+    def __repr__(self):
+        return '%s%r' % (self.__class__.__name__,
+                         (self.name, self.code))
+
+    def __format__(self, format):
+        return self.code
+
+# Benchmark Colors Enumeration
+BC_NONE = BenchmarkColor('NONE', '')
+BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
+BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
+BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
+BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
+BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
+BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
+BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
+BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
+BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
+BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
+
+def color_format(use_color, fmt_str, *args, **kwargs):
+    """
+    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
+    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
+    is False then all color codes in 'args' and 'kwargs' are replaced with
+    the empty string.
+    """
+    assert use_color is True or use_color is False
+    if not use_color:
+        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                for arg in args]
+        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                  for key, arg in kwargs.items()}
+    return fmt_str.format(*args, **kwargs)
+
+
+def find_longest_name(benchmark_list):
+    """
+    Return the length of the longest benchmark name in a given list of
+    benchmark JSON objects
+    """
+    longest_name = 1
+    for bc in benchmark_list:
+        if len(bc['name']) > longest_name:
+            longest_name = len(bc['name'])
+    return longest_name
+
+
+def calculate_change(old_val, new_val):
+    """
+    Return a float representing the decimal change between old_val and new_val.
+    """
+    return float(new_val - old_val) / abs(old_val)
+
+
+def generate_difference_report(json1, json2, use_color=True):
+    """
+    Calculate and report the difference between each test of two benchmarks
+    runs specified as 'json1' and 'json2'.
+    """
+    first_col_width = find_longest_name(json1['benchmarks']) + 5
+    def find_test(name):
+        for b in json2['benchmarks']:
+            if b['name'] == name:
+                return b
+        return None
+    first_line = "{:<{}s}     Time           CPU".format(
+        'Benchmark', first_col_width)
+    output_strs = [first_line, '-' * len(first_line)]
+    for bn in json1['benchmarks']:
+        other_bench = find_test(bn['name'])
+        if not other_bench:
+            continue
+
+        def get_color(res):
+            if res > 0.05:
+                return BC_FAIL
+            elif res > -0.07:
+                return BC_WHITE
+            else:
+                return BC_CYAN
+        fmt_str = "{}{:<{}s}{endc}    {}{:+.2f}{endc}         {}{:+.2f}{endc}"
+        tres = calculate_change(bn['real_time'], other_bench['real_time'])
+        cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
+        output_strs += [color_format(use_color, fmt_str,
+            BC_HEADER, bn['name'], first_col_width,
+            get_color(tres), tres, get_color(cpures), cpures,
+            endc=BC_ENDC)]
+    return output_strs
+
+###############################################################################
+# Unit tests
+
+import unittest
+
+class TestReportDifference(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test1_run1.json')
+        testOutput2 = os.path.join(testInputs, 'test1_run2.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_basic(self):
+        expect_lines = [
+            ['BM_SameTimes', '+0.00', '+0.00'],
+            ['BM_2xFaster', '-0.50', '-0.50'],
+            ['BM_2xSlower', '+1.00', '+1.00'],
+            ['BM_10PercentFaster', '-0.10', '-0.10'],
+            ['BM_10PercentSlower', '+0.10', '+0.10']
+        ]
+        json1, json2 = self.load_results()
+        output_lines = generate_difference_report(json1, json2, use_color=False)
+        print output_lines
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in xrange(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 3)
+            self.assertEqual(parts, expect_lines[i])
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/utils/google-benchmark/tools/gbench/util.py
+++ b/utils/google-benchmark/tools/gbench/util.py
@@ -0,0 +1,130 @@
+"""util.py - General utilities for running, loading, and processing benchmarks
+"""
+import json
+import os
+import tempfile
+import subprocess
+import sys
+
+# Input file type enumeration
+IT_Invalid    = 0
+IT_JSON       = 1
+IT_Executable = 2
+
+_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
+def is_executable_file(filename):
+    """
+    Return 'True' if 'filename' names a valid file which is likely
+    an executable. A file is considered an executable if it starts with the
+    magic bytes for a EXE, Mach O, or ELF file.
+    """
+    if not os.path.isfile(filename):
+        return False
+    with open(filename, 'r') as f:
+        magic_bytes = f.read(_num_magic_bytes)
+    if sys.platform == 'darwin':
+        return magic_bytes in [
+            '\xfe\xed\xfa\xce',  # MH_MAGIC
+            '\xce\xfa\xed\xfe',  # MH_CIGAM
+            '\xfe\xed\xfa\xcf',  # MH_MAGIC_64
+            '\xcf\xfa\xed\xfe',  # MH_CIGAM_64
+            '\xca\xfe\xba\xbe',  # FAT_MAGIC
+            '\xbe\xba\xfe\xca'   # FAT_CIGAM
+        ]
+    elif sys.platform.startswith('win'):
+        return magic_bytes == 'MZ'
+    else:
+        return magic_bytes == '\x7FELF'
+
+
+def is_json_file(filename):
+    """
+    Returns 'True' if 'filename' names a valid JSON output file.
+    'False' otherwise.
+    """
+    try:
+        with open(filename, 'r') as f:
+            json.load(f)
+        return True
+    except:
+        pass
+    return False
+
+
+def classify_input_file(filename):
+    """
+    Return a tuple (type, msg) where 'type' specifies the classified type
+    of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
+    string represeting the error.
+    """
+    ftype = IT_Invalid
+    err_msg = None
+    if not os.path.exists(filename):
+        err_msg = "'%s' does not exist" % filename
+    elif not os.path.isfile(filename):
+        err_msg = "'%s' does not name a file" % filename
+    elif is_executable_file(filename):
+        ftype = IT_Executable
+    elif is_json_file(filename):
+        ftype = IT_JSON
+    else:
+        err_msg = "'%s' does not name a valid benchmark executable or JSON file"
+    return ftype, err_msg
+
+
+def check_input_file(filename):
+    """
+    Classify the file named by 'filename' and return the classification.
+    If the file is classified as 'IT_Invalid' print an error message and exit
+    the program.
+    """
+    ftype, msg = classify_input_file(filename)
+    if ftype == IT_Invalid:
+        print "Invalid input file: %s" % msg
+        sys.exit(1)
+    return ftype
+
+
+def load_benchmark_results(fname):
+    """
+    Read benchmark output from a file and return the JSON object.
+    REQUIRES: 'fname' names a file containing JSON benchmark output.
+    """
+    with open(fname, 'r') as f:
+        return json.load(f)
+
+
+def run_benchmark(exe_name, benchmark_flags):
+    """
+    Run a benchmark specified by 'exe_name' with the specified
+    'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
+    real time console output.
+    RETURNS: A JSON object representing the benchmark output
+    """
+    thandle, tname = tempfile.mkstemp()
+    os.close(thandle)
+    cmd = [exe_name] + benchmark_flags
+    print("RUNNING: %s" % ' '.join(cmd))
+    exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname])
+    if exitCode != 0:
+        print('TEST FAILED...')
+        sys.exit(exitCode)
+    json_res = load_benchmark_results(tname)
+    os.unlink(tname)
+    return json_res
+
+
+def run_or_load_benchmark(filename, benchmark_flags):
+    """
+    Get the results for a specified benchmark. If 'filename' specifies
+    an executable benchmark then the results are generated by running the
+    benchmark. Otherwise 'filename' must name a valid JSON output file,
+    which is loaded and the result returned.
+    """
+    ftype = check_input_file(filename)
+    if ftype == IT_JSON:
+        return load_benchmark_results(filename)
+    elif ftype == IT_Executable:
+        return run_benchmark(filename, benchmark_flags)
+    else:
+        assert False # This branch is unreachable