From d87eb99b8071fbd5af4d6d04ac864ada752e24bc Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Sat, 5 Nov 2016 00:30:27 +0000
Subject: [PATCH] Upgrade in-tree google benchmark to v1.1

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@286029 91177308-0d34-0410-b5e6-96231b3b80d8
---
 utils/google-benchmark/AUTHORS                |    1 +
 utils/google-benchmark/CMakeLists.txt         |   17 +-
 utils/google-benchmark/CONTRIBUTORS           |    1 +
 utils/google-benchmark/README.md              |   28 +
 .../cmake/AddCXXCompilerFlag.cmake            |    2 +-
 .../include/benchmark/benchmark.h             |    4 +-
 .../include/benchmark/benchmark_api.h         |  403 +++---
 .../include/benchmark/macros.h                |   56 +-
 .../include/benchmark/reporter.h              |   48 +-
 utils/google-benchmark/src/CMakeLists.txt     |   23 +-
 utils/google-benchmark/src/arraysize.h        |    7 +-
 utils/google-benchmark/src/benchmark.cc       | 1158 +++++------------
 .../src/benchmark_api_internal.h              |   47 +
 .../src/benchmark_register.cc                 |  439 +++++++
 utils/google-benchmark/src/check.h            |   45 +-
 utils/google-benchmark/src/colorprint.cc      |   52 +-
 utils/google-benchmark/src/colorprint.h       |    9 +-
 .../google-benchmark/src/commandlineflags.cc  |   32 +-
 utils/google-benchmark/src/commandlineflags.h |    7 +-
 utils/google-benchmark/src/complexity.cc      |    5 +-
 utils/google-benchmark/src/complexity.h       |    9 +-
 .../google-benchmark/src/console_reporter.cc  |   53 +-
 utils/google-benchmark/src/csv_reporter.cc    |   28 +-
 utils/google-benchmark/src/internal_macros.h  |   56 +-
 utils/google-benchmark/src/json_reporter.cc   |   69 +-
 utils/google-benchmark/src/log.cc             |   40 -
 utils/google-benchmark/src/log.h              |   65 +-
 utils/google-benchmark/src/mutex.h            |  127 +-
 utils/google-benchmark/src/re.h               |   72 +-
 utils/google-benchmark/src/re_posix.cc        |   59 -
 utils/google-benchmark/src/re_std.cc          |   44 -
 utils/google-benchmark/src/reporter.cc        |   31 +-
 utils/google-benchmark/src/stat.h             |    3 +-
 utils/google-benchmark/src/string_util.cc     |   27 +-
 utils/google-benchmark/src/string_util.h      |   22 +-
 utils/google-benchmark/src/sysinfo.cc         |  182 +--
 utils/google-benchmark/src/sysinfo.h          |    2 -
 utils/google-benchmark/src/timers.cc          |  195 +++
 utils/google-benchmark/src/timers.h           |   48 +
 utils/google-benchmark/src/walltime.cc        |  263 ----
 utils/google-benchmark/src/walltime.h         |   17 -
 utils/google-benchmark/test/CMakeLists.txt    |   10 +-
 utils/google-benchmark/test/basic_test.cc     |   19 +-
 utils/google-benchmark/test/benchmark_test.cc |   93 +-
 .../google-benchmark/test/complexity_test.cc  |  158 ++-
 utils/google-benchmark/test/cxx03_test.cc     |   22 +-
 .../google-benchmark/test/diagnostics_test.cc |   17 +-
 .../test/donotoptimize_test.cc                |    7 +-
 utils/google-benchmark/test/filter_test.cc    |   35 +-
 utils/google-benchmark/test/fixture_test.cc   |    7 +-
 utils/google-benchmark/test/map_test.cc       |    6 +-
 .../test/multiple_ranges_test.cc              |   45 +-
 utils/google-benchmark/test/options_test.cc   |    3 +-
 utils/google-benchmark/test/output_test.h     |   29 +-
 .../test/output_test_helper.cc                |  252 ++--
 .../test/register_benchmark_test.cc           |   55 +-
 .../test/reporter_output_test.cc              |  300 +++--
 .../test/skip_with_error_test.cc              |   69 +-
 utils/google-benchmark/tools/gbench/report.py |    9 +-
 59 files changed, 2445 insertions(+), 2487 deletions(-)
 create mode 100644 utils/google-benchmark/src/benchmark_api_internal.h
 create mode 100644 utils/google-benchmark/src/benchmark_register.cc
 delete mode 100644 utils/google-benchmark/src/log.cc
 delete mode 100644 utils/google-benchmark/src/re_posix.cc
 delete mode 100644 utils/google-benchmark/src/re_std.cc
 create mode 100644 utils/google-benchmark/src/timers.cc
 create mode 100644 utils/google-benchmark/src/timers.h
 delete mode 100644 utils/google-benchmark/src/walltime.cc
 delete mode 100644 utils/google-benchmark/src/walltime.h

diff --git a/utils/google-benchmark/AUTHORS b/utils/google-benchmark/AUTHORS
index 0e409220d..5a545fa5b 100644
--- a/utils/google-benchmark/AUTHORS
+++ b/utils/google-benchmark/AUTHORS
@@ -24,6 +24,7 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com>
 Kaito Udagawa <umireon@gmail.com>
 Lei Xu <eddyxu@gmail.com>
 Matt Clarkson <mattyclarkson@gmail.com>
+Nick Hutchinson <nshutchinson@gmail.com>
 Oleksandr Sochka <sasha.sochka@gmail.com>
 Paul Redmond <paul.redmond@gmail.com>
 Radoslav Yovchev <radoslav.tm@gmail.com>
diff --git a/utils/google-benchmark/CMakeLists.txt b/utils/google-benchmark/CMakeLists.txt
index 1ef830076..8bfd21b19 100644
--- a/utils/google-benchmark/CMakeLists.txt
+++ b/utils/google-benchmark/CMakeLists.txt
@@ -87,11 +87,18 @@ else()
     add_cxx_compiler_flag(-Wstrict-aliasing)
   endif()
   add_cxx_compiler_flag(-Wthread-safety)
-  if (HAVE_WTHREAD_SAFETY)
-    add_definitions(-DHAVE_WTHREAD_SAFETY)
+  if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
     cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
   endif()
 
+  # On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a
+  # predefined macro, which turns on all of the wonderful libc extensions.
+  # However g++ doesn't do this in Cygwin so we have to define it ourselfs
+  # since we depend on GNU/POSIX/BSD extensions.
+  if (CYGWIN)
+    add_definitions(-D_GNU_SOURCE=1)
+  endif()
+
   # Link time optimisation
   if (BENCHMARK_ENABLE_LTO)
     add_cxx_compiler_flag(-flto)
@@ -148,11 +155,15 @@ if (BENCHMARK_USE_LIBCXX)
 endif(BENCHMARK_USE_LIBCXX)
 
 # C++ feature checks
+# Determine the correct regular expression engine to use
 cxx_feature_check(STD_REGEX)
 cxx_feature_check(GNU_POSIX_REGEX)
 cxx_feature_check(POSIX_REGEX)
-cxx_feature_check(STEADY_CLOCK)
+if(NOT HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX)
+  message(FATAL_ERROR "Failed to determine the source files for the regular expression backend")
+endif()
 
+cxx_feature_check(STEADY_CLOCK)
 # Ensure we have pthreads
 find_package(Threads REQUIRED)
 
diff --git a/utils/google-benchmark/CONTRIBUTORS b/utils/google-benchmark/CONTRIBUTORS
index 4bff126f7..33cd941ff 100644
--- a/utils/google-benchmark/CONTRIBUTORS
+++ b/utils/google-benchmark/CONTRIBUTORS
@@ -40,6 +40,7 @@ Kaito Udagawa <umireon@gmail.com>
 Kai Wolf <kai.wolf@gmail.com>
 Lei Xu <eddyxu@gmail.com>
 Matt Clarkson <mattyclarkson@gmail.com>
+Nick Hutchinson <nshutchinson@gmail.com>
 Oleksandr Sochka <sasha.sochka@gmail.com>
 Pascal Leroy <phl@google.com>
 Paul Redmond <paul.redmond@gmail.com>
diff --git a/utils/google-benchmark/README.md b/utils/google-benchmark/README.md
index 0c081ff8e..910943098 100644
--- a/utils/google-benchmark/README.md
+++ b/utils/google-benchmark/README.md
@@ -9,6 +9,8 @@ Discussion group: https://groups.google.com/d/forum/benchmark-discuss
 
 IRC channel: https://freenode.net #googlebenchmark
 
+[Known issues and common problems](#known-issues)
+
 ## Example usage
 ### Basic usage
 Define a function that executes the code to be measured.
@@ -460,6 +462,24 @@ static void BM_test(benchmark::State& state) {
 }
 ```
 
+## Running a subset of the benchmarks
+
+The `--benchmark_filter=<regex>` option can be used to only run the benchmarks
+which match the specified `<regex>`. For example:
+
+```bash
+$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32
+Run on (1 X 2300 MHz CPU )
+2016-06-25 19:34:24
+Benchmark              Time           CPU Iterations
+----------------------------------------------------
+BM_memcpy/32          11 ns         11 ns   79545455
+BM_memcpy/32k       2181 ns       2185 ns     324074
+BM_memcpy/32          12 ns         12 ns   54687500
+BM_memcpy/32k       1834 ns       1837 ns     357143
+```
+
+
 ## Output Formats
 The library supports multiple output formats. Use the
 `--benchmark_format=<console|json|csv>` flag to set the format type. `console`
@@ -567,3 +587,11 @@ Anything older *may* work.
 
 Note: Using the library and its headers in C++03 is supported. C++11 is only
 required to build the library.
+
+# Known Issues
+
+### Windows
+
+* Users must manually link `shlwapi.lib`. Failure to do so may result
+in unresolved symbols.
+
diff --git a/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake b/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake
index 870f11ae4..9afde84be 100644
--- a/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake
+++ b/utils/google-benchmark/cmake/AddCXXCompilerFlag.cmake
@@ -25,7 +25,7 @@ function(add_cxx_compiler_flag FLAG)
   string(REGEX REPLACE "[^A-Za-z_0-9]" "_" SANITIZED_FLAG ${SANITIZED_FLAG})
   string(REGEX REPLACE "_+" "_" SANITIZED_FLAG ${SANITIZED_FLAG})
   set(CMAKE_REQUIRED_FLAGS "${FLAG}")
-  check_cxx_compiler_flag("" ${SANITIZED_FLAG})
+  check_cxx_compiler_flag("${FLAG}" ${SANITIZED_FLAG})
   if(${SANITIZED_FLAG})
     set(VARIANT ${ARGV1})
     if(ARGV1)
diff --git a/utils/google-benchmark/include/benchmark/benchmark.h b/utils/google-benchmark/include/benchmark/benchmark.h
index 18aa9e634..b3b0a8e94 100644
--- a/utils/google-benchmark/include/benchmark/benchmark.h
+++ b/utils/google-benchmark/include/benchmark/benchmark.h
@@ -14,8 +14,8 @@
 #ifndef BENCHMARK_BENCHMARK_H_
 #define BENCHMARK_BENCHMARK_H_
 
-#include "macros.h"
 #include "benchmark_api.h"
+#include "macros.h"
 #include "reporter.h"
 
-#endif // BENCHMARK_BENCHMARK_H_
+#endif  // BENCHMARK_BENCHMARK_H_
diff --git a/utils/google-benchmark/include/benchmark/benchmark_api.h b/utils/google-benchmark/include/benchmark/benchmark_api.h
index 592ed6372..28baa587a 100644
--- a/utils/google-benchmark/include/benchmark/benchmark_api.h
+++ b/utils/google-benchmark/include/benchmark/benchmark_api.h
@@ -153,6 +153,7 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #include <stddef.h>
 #include <stdint.h>
 
+#include <string>
 #include <vector>
 
 #include "macros.h"
@@ -173,7 +174,8 @@ void Initialize(int* argc, char** argv);
 // report the results.
 //
 // The second and third overload use the specified 'console_reporter' and
-//  'file_reporter' respectively. 'file_reporter' will write to the file specified
+//  'file_reporter' respectively. 'file_reporter' will write to the file
+//  specified
 //   by '--benchmark_output'. If '--benchmark_output' is not given the
 //  'file_reporter' is ignored.
 //
@@ -183,7 +185,6 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter);
 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
                               BenchmarkReporter* file_reporter);
 
-
 // If this routine is called, peak memory allocation past this point in the
 // benchmark is reported at the end of the benchmark report line. (It is
 // computed by running the benchmark once with a single iteration and a memory
@@ -196,8 +197,9 @@ class Benchmark;
 class BenchmarkImp;
 class BenchmarkFamilies;
 
-template <class T> struct Voider {
-    typedef void type;
+template <class T>
+struct Voider {
+  typedef void type;
 };
 
 template <class T, class = void>
@@ -205,7 +207,7 @@ struct EnableIfString {};
 
 template <class T>
 struct EnableIfString<T, typename Voider<typename T::basic_string>::type> {
-    typedef int type;
+  typedef int type;
 };
 
 void UseCharPointer(char const volatile*);
@@ -218,8 +220,7 @@ Benchmark* RegisterBenchmarkInternal(Benchmark*);
 int InitializeStreams();
 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
 
-} // end namespace internal
-
+}  // end namespace internal
 
 // The DoNotOptimize(...) function can be used to prevent a value or
 // expression from being optimized away by the compiler. This function is
@@ -228,73 +229,64 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
 #if defined(__GNUC__)
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-    asm volatile("" : : "g"(value) : "memory");
+  asm volatile("" : : "g"(value) : "memory");
 }
 // Force the compiler to flush pending writes to global memory. Acts as an
 // effective read/write barrier
 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
-    asm volatile("" : : : "memory");
+  asm volatile("" : : : "memory");
 }
 #else
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-    internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
+  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
 }
 // FIXME Add ClobberMemory() for non-gnu compilers
 #endif
 
 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
 // for the measured time.
-enum TimeUnit {
-  kNanosecond,
-  kMicrosecond,
-  kMillisecond
-};
+enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
 
-// BigO is passed to a benchmark in order to specify the asymptotic computational 
-// complexity for the benchmark. In case oAuto is selected, complexity will be 
+// BigO is passed to a benchmark in order to specify the asymptotic
+// computational
+// complexity for the benchmark. In case oAuto is selected, complexity will be
 // calculated automatically to the best fit.
-enum BigO {
-  oNone,
-  o1,
-  oN,
-  oNSquared,
-  oNCubed,
-  oLogN,
-  oNLogN,
-  oAuto,
-  oLambda
-};
+enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
 
-// BigOFunc is passed to a benchmark in order to specify the asymptotic 
+// BigOFunc is passed to a benchmark in order to specify the asymptotic
 // computational complexity for the benchmark.
 typedef double(BigOFunc)(int);
 
+namespace internal {
+class ThreadTimer;
+class ThreadManager;
+
+#if defined(BENCHMARK_HAS_CXX11)
+enum ReportMode : unsigned {
+#else
+enum ReportMode {
+#endif
+  RM_Unspecified,  // The mode has not been manually specified
+  RM_Default,      // The mode is user-specified as default.
+  RM_ReportAggregatesOnly
+};
+}
+
 // State is passed to a running Benchmark and contains state for the
 // benchmark to use.
 class State {
-public:
-  State(size_t max_iters, const std::vector<int>& ranges,
-        int thread_i, int n_threads);
-
+ public:
   // Returns true if the benchmark should continue through another iteration.
   // NOTE: A benchmark may not return from the test until KeepRunning() has
   // returned false.
   bool KeepRunning() {
     if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
-      assert(!finished_);
-      started_ = true;
-      ResumeTiming();
+      StartKeepRunning();
     }
     bool const res = total_iterations_++ < max_iterations;
     if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
-      assert(started_ && (!finished_ || error_occurred_));
-      if (!error_occurred_) {
-        PauseTiming();
-      }
-      // Total iterations now is one greater than max iterations. Fix this.
-      total_iterations_ = max_iterations;
-      finished_ = true;
+      FinishKeepRunning();
     }
     return res;
   }
@@ -304,10 +296,11 @@ public:
   // Stop the benchmark timer.  If not called, the timer will be
   // automatically stopped after KeepRunning() returns false for the first time.
   //
-  // For threaded benchmarks the PauseTiming() function acts
-  // like a barrier.  I.e., the ith call by a particular thread to this
-  // function will block until all active threads have made their ith call.
-  // The timer will stop when the last thread has called this function.
+  // For threaded benchmarks the PauseTiming() function only pauses the timing
+  // for the current thread.
+  //
+  // NOTE: The "real time" measurement is per-thread. If different threads
+  // report different measurements the largest one is reported.
   //
   // NOTE: PauseTiming()/ResumeTiming() are relatively
   // heavyweight, and so their use should generally be avoided
@@ -319,11 +312,6 @@ public:
   // Start the benchmark timer.  The timer is NOT running on entrance to the
   // benchmark function. It begins running after the first call to KeepRunning()
   //
-  // For threaded benchmarks the ResumeTiming() function acts
-  // like a barrier.  I.e., the ith call by a particular thread to this
-  // function will block until all active threads have made their ith call.
-  // The timer will start when the last thread has called this function.
-  //
   // NOTE: PauseTiming()/ResumeTiming() are relatively
   // heavyweight, and so their use should generally be avoided
   // within each benchmark iteration, if possible.
@@ -335,10 +323,10 @@ public:
   // thread and report an error with the specified 'msg'. After this call
   // the user may explicitly 'return' from the benchmark.
   //
-  // For threaded benchmarks only the current thread stops executing. If
-  // multiple threads report an error only the first error message is used.
-  // The current thread is no longer considered 'active' by
-  // 'PauseTiming()' and 'ResumingTiming()'.
+  // For threaded benchmarks only the current thread stops executing and future
+  // calls to `KeepRunning()` will block until all threads have completed
+  // the `KeepRunning()` loop. If multiple threads report an error only the
+  // first error message is used.
   //
   // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
   // the current scope immediately. If the function is called from within
@@ -351,10 +339,8 @@ public:
   // is used instead of automatically measured time if UseManualTime() was
   // specified.
   //
-  // For threaded benchmarks the SetIterationTime() function acts
-  // like a barrier.  I.e., the ith call by a particular thread to this
-  // function will block until all threads have made their ith call.
-  // The time will be set by the last thread to call this function.
+  // For threaded benchmarks the final value will be set to the largest
+  // reported values.
   void SetIterationTime(double seconds);
 
   // Set the number of bytes processed by the current benchmark
@@ -365,27 +351,21 @@ public:
   //
   // REQUIRES: a benchmark has exited its KeepRunning loop.
   BENCHMARK_ALWAYS_INLINE
-  void SetBytesProcessed(size_t bytes) {
-    bytes_processed_ = bytes;
-  }
+  void SetBytesProcessed(size_t bytes) { bytes_processed_ = bytes; }
 
   BENCHMARK_ALWAYS_INLINE
-  size_t bytes_processed() const {
-    return bytes_processed_;
-  }
+  size_t bytes_processed() const { return bytes_processed_; }
 
-  // If this routine is called with complexity_n > 0 and complexity report is requested for the 
-  // family benchmark, then current benchmark will be part of the computation and complexity_n will
+  // If this routine is called with complexity_n > 0 and complexity report is
+  // requested for the
+  // family benchmark, then current benchmark will be part of the computation
+  // and complexity_n will
   // represent the length of N.
   BENCHMARK_ALWAYS_INLINE
-  void SetComplexityN(int complexity_n) {
-    complexity_n_ = complexity_n;
-  }
+  void SetComplexityN(int complexity_n) { complexity_n_ = complexity_n; }
 
   BENCHMARK_ALWAYS_INLINE
-  int complexity_length_n() {
-    return complexity_n_;
-  }
+  int complexity_length_n() { return complexity_n_; }
 
   // If this routine is called with items > 0, then an items/s
   // label is printed on the benchmark report line for the currently
@@ -394,14 +374,10 @@ public:
   //
   // REQUIRES: a benchmark has exited its KeepRunning loop.
   BENCHMARK_ALWAYS_INLINE
-  void SetItemsProcessed(size_t items) {
-    items_processed_ = items;
-  }
+  void SetItemsProcessed(size_t items) { items_processed_ = items; }
 
   BENCHMARK_ALWAYS_INLINE
-  size_t items_processed() const {
-    return items_processed_;
-  }
+  size_t items_processed() const { return items_processed_; }
 
   // If this routine is called, the specified label is printed at the
   // end of the benchmark report line for the currently executing
@@ -422,7 +398,7 @@ public:
   // has the nested typename `basic_string`. This typename should be provided
   // as an injected class name in the case of std::string.
   template <class StringType>
-  void SetLabel(StringType const & str,
+  void SetLabel(StringType const& str,
                 typename internal::EnableIfString<StringType>::type = 1) {
     this->SetLabel(str.c_str());
   }
@@ -430,8 +406,8 @@ public:
   // Range arguments for this run. CHECKs if the argument has been set.
   BENCHMARK_ALWAYS_INLINE
   int range(std::size_t pos = 0) const {
-      assert(range_.size() > pos);
-      return range_[pos];
+    assert(range_.size() > pos);
+    return range_[pos];
   }
 
   BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
@@ -443,7 +419,7 @@ public:
   BENCHMARK_ALWAYS_INLINE
   size_t iterations() const { return total_iterations_; }
 
-private:
+ private:
   bool started_;
   bool finished_;
   size_t total_iterations_;
@@ -455,17 +431,25 @@ private:
 
   int complexity_n_;
 
-public:
-  // FIXME: Make this private somehow.
   bool error_occurred_;
-public:
+
+ public:
   // Index of the executing thread. Values from [0, threads).
   const int thread_index;
   // Number of threads concurrently executing the benchmark.
   const int threads;
   const size_t max_iterations;
 
-private:
+  // TODO make me private
+  State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
+        int n_threads, internal::ThreadTimer* timer,
+        internal::ThreadManager* manager);
+
+ private:
+  void StartKeepRunning();
+  void FinishKeepRunning();
+  internal::ThreadTimer* timer_;
+  internal::ThreadManager* manager_;
   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
 };
 
@@ -480,7 +464,7 @@ typedef void(Function)(State&);
 // Each method returns "this" so that multiple method calls can
 // chained into one expression.
 class Benchmark {
-public:
+ public:
   virtual ~Benchmark();
 
   // Note: the following methods all return "this" so that multiple
@@ -499,7 +483,8 @@ public:
   // REQUIRES: The function passed to the constructor must accept an arg1.
   Benchmark* Range(int start, int limit);
 
-  // Run this benchmark once for all values in the range [start..limit] with specific step
+  // Run this benchmark once for all values in the range [start..limit] with
+  // specific step
   // REQUIRES: The function passed to the constructor must accept an arg1.
   Benchmark* DenseRange(int start, int limit, int step = 1);
 
@@ -512,10 +497,10 @@ public:
   // NOTE: This is a legacy C++03 interface provided for compatibility only.
   //   New code should use 'Args'.
   Benchmark* ArgPair(int x, int y) {
-      std::vector<int> args;
-      args.push_back(x);
-      args.push_back(y);
-      return Args(args);
+    std::vector<int> args;
+    args.push_back(x);
+    args.push_back(y);
+    return Args(args);
   }
 
   // Run this benchmark once for a number of values picked from the
@@ -523,14 +508,21 @@ public:
   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
   Benchmark* Ranges(const std::vector<std::pair<int, int> >& ranges);
 
+  // Equivalent to ArgNames({name})
+  Benchmark* ArgName(const std::string& name);
+
+  // Set the argument names to display in the benchmark name. If not called,
+  // only argument values will be shown.
+  Benchmark* ArgNames(const std::vector<std::string>& names);
+
   // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
   // NOTE: This is a legacy C++03 interface provided for compatibility only.
   //   New code should use 'Ranges'.
   Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2) {
-      std::vector<std::pair<int, int> > ranges;
-      ranges.push_back(std::make_pair(lo1, hi1));
-      ranges.push_back(std::make_pair(lo2, hi2));
-      return Ranges(ranges);
+    std::vector<std::pair<int, int> > ranges;
+    ranges.push_back(std::make_pair(lo1, hi1));
+    ranges.push_back(std::make_pair(lo2, hi2));
+    return Ranges(ranges);
   }
 
   // Pass this benchmark object to *func, which can customize
@@ -538,8 +530,8 @@ public:
   // Threads, etc.
   Benchmark* Apply(void (*func)(Benchmark* benchmark));
 
-  // Set the range multiplier for non-dense range. If not called, the range multiplier 
-  // kRangeMultiplier will be used.
+  // Set the range multiplier for non-dense range. If not called, the range
+  // multiplier kRangeMultiplier will be used.
   Benchmark* RangeMultiplier(int multiplier);
 
   // Set the minimum amount of time to use when running this benchmark. This
@@ -564,15 +556,17 @@ public:
   // called, the cpu time used by the benchmark will be used.
   Benchmark* UseRealTime();
 
-  // If a benchmark must measure time manually (e.g. if GPU execution time is being
-  // measured), call this method. If called, each benchmark iteration should call
+  // If a benchmark must measure time manually (e.g. if GPU execution time is
+  // being
+  // measured), call this method. If called, each benchmark iteration should
+  // call
   // SetIterationTime(seconds) to report the measured time, which will be used
   // to control how many iterations are run, and in the printing of items/second
   // or MB/second values.
   Benchmark* UseManualTime();
 
   // Set the asymptotic computational complexity for the benchmark. If called
-  // the asymptotic computational complexity will be shown on the output. 
+  // the asymptotic computational complexity will be shown on the output.
   Benchmark* Complexity(BigO complexity = benchmark::oAuto);
 
   // Set the asymptotic computational complexity for the benchmark. If called
@@ -600,6 +594,12 @@ public:
   //    Foo in 16 threads
   Benchmark* ThreadRange(int min_threads, int max_threads);
 
+  // For each value n in the range, run this benchmark once using n threads.
+  // min_threads and max_threads are always included in the range.
+  // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
+  // a benchmark with 1, 4, 7 and 8 threads.
+  Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
+
   // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
   Benchmark* ThreadPerCpu();
 
@@ -608,25 +608,43 @@ public:
   // Used inside the benchmark implementation
   struct Instance;
 
-protected:
+ protected:
   explicit Benchmark(const char* name);
   Benchmark(Benchmark const&);
   void SetName(const char* name);
 
-private:
+  int ArgsCnt() const;
+
+  static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
+
+ private:
   friend class BenchmarkFamilies;
-  BenchmarkImp* imp_;
+
+  std::string name_;
+  ReportMode report_mode_;
+  std::vector<std::string> arg_names_;   // Args for all benchmark runs
+  std::vector<std::vector<int> > args_;  // Args for all benchmark runs
+  TimeUnit time_unit_;
+  int range_multiplier_;
+  double min_time_;
+  int repetitions_;
+  bool use_real_time_;
+  bool use_manual_time_;
+  BigO complexity_;
+  BigOFunc* complexity_lambda_;
+  std::vector<int> thread_counts_;
 
   Benchmark& operator=(Benchmark const&);
 };
 
-} // namespace internal
+}  // namespace internal
 
 // Create and register a benchmark with the specified 'name' that invokes
 // the specified functor 'fn'.
 //
 // RETURNS: A pointer to the registered benchmark.
-internal::Benchmark* RegisterBenchmark(const char* name, internal::Function* fn);
+internal::Benchmark* RegisterBenchmark(const char* name,
+                                       internal::Function* fn);
 
 #if defined(BENCHMARK_HAS_CXX11)
 template <class Lambda>
@@ -637,30 +655,30 @@ namespace internal {
 // The class used to hold all Benchmarks created from static function.
 // (ie those created using the BENCHMARK(...) macros.
 class FunctionBenchmark : public Benchmark {
-public:
-    FunctionBenchmark(const char* name, Function* func)
-        : Benchmark(name), func_(func)
-    {}
+ public:
+  FunctionBenchmark(const char* name, Function* func)
+      : Benchmark(name), func_(func) {}
 
-    virtual void Run(State& st);
-private:
-    Function* func_;
+  virtual void Run(State& st);
+
+ private:
+  Function* func_;
 };
 
 #ifdef BENCHMARK_HAS_CXX11
 template <class Lambda>
 class LambdaBenchmark : public Benchmark {
-public:
-    virtual void Run(State& st) { lambda_(st); }
+ public:
+  virtual void Run(State& st) { lambda_(st); }
 
-private:
+ private:
   template <class OLambda>
   LambdaBenchmark(const char* name, OLambda&& lam)
       : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
 
   LambdaBenchmark(LambdaBenchmark const&) = delete;
 
-private:
+ private:
   template <class Lam>
   friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
 
@@ -670,55 +688,58 @@ private:
 
 }  // end namespace internal
 
-inline internal::Benchmark*
-RegisterBenchmark(const char* name, internal::Function* fn) {
-    return internal::RegisterBenchmarkInternal(
-        ::new internal::FunctionBenchmark(name, fn));
+inline internal::Benchmark* RegisterBenchmark(const char* name,
+                                              internal::Function* fn) {
+  return internal::RegisterBenchmarkInternal(
+      ::new internal::FunctionBenchmark(name, fn));
 }
 
 #ifdef BENCHMARK_HAS_CXX11
 template <class Lambda>
 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
-    using BenchType = internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
-    return internal::RegisterBenchmarkInternal(
-        ::new BenchType(name, std::forward<Lambda>(fn)));
+  using BenchType =
+      internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
+  return internal::RegisterBenchmarkInternal(
+      ::new BenchType(name, std::forward<Lambda>(fn)));
 }
 #endif
 
 #if defined(BENCHMARK_HAS_CXX11) && \
-     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
-template <class Lambda, class ...Args>
+    (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
+template <class Lambda, class... Args>
 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
                                        Args&&... args) {
-    return benchmark::RegisterBenchmark(name,
-        [=](benchmark::State& st) { fn(st, args...); });
+  return benchmark::RegisterBenchmark(
+      name, [=](benchmark::State& st) { fn(st, args...); });
 }
 #else
 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
 #endif
 
-
 // The base class for all fixture tests.
-class Fixture: public internal::Benchmark {
-public:
-    Fixture() : internal::Benchmark("") {}
+class Fixture : public internal::Benchmark {
+ public:
+  Fixture() : internal::Benchmark("") {}
 
-    virtual void Run(State& st) {
-      this->SetUp(st);
-      this->BenchmarkCase(st);
-      this->TearDown(st);
-    }
+  virtual void Run(State& st) {
+    this->SetUp(st);
+    this->BenchmarkCase(st);
+    this->TearDown(st);
+  }
 
-    virtual void SetUp(const State&) {}
-    virtual void TearDown(const State&) {}
+  // These will be deprecated ...
+  virtual void SetUp(const State&) {}
+  virtual void TearDown(const State&) {}
+  // ... In favor of these.
+  virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
+  virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
 
-protected:
-    virtual void BenchmarkCase(State&) = 0;
+ protected:
+  virtual void BenchmarkCase(State&) = 0;
 };
 
 }  // end namespace benchmark
 
-
 // ------------------------------------------------------
 // Macro to register benchmarks
 
@@ -733,18 +754,18 @@ protected:
 
 // Helpers for generating unique variable names
 #define BENCHMARK_PRIVATE_NAME(n) \
-    BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
+  BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
 
-#define BENCHMARK_PRIVATE_DECLARE(n)       \
-  static ::benchmark::internal::Benchmark* \
-  BENCHMARK_PRIVATE_NAME(n) BENCHMARK_UNUSED
+#define BENCHMARK_PRIVATE_DECLARE(n)                                 \
+  static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
+      BENCHMARK_UNUSED
 
-#define BENCHMARK(n) \
-    BENCHMARK_PRIVATE_DECLARE(n) =                               \
-        (::benchmark::internal::RegisterBenchmarkInternal(       \
-            new ::benchmark::internal::FunctionBenchmark(#n, n)))
+#define BENCHMARK(n)                                     \
+  BENCHMARK_PRIVATE_DECLARE(n) =                         \
+      (::benchmark::internal::RegisterBenchmarkInternal( \
+          new ::benchmark::internal::FunctionBenchmark(#n, n)))
 
 // Old-style macros
 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
@@ -767,14 +788,14 @@ protected:
 //}
 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
-#define BENCHMARK_CAPTURE(func, test_case_name, ...)                       \
-    BENCHMARK_PRIVATE_DECLARE(func) =                                      \
-        (::benchmark::internal::RegisterBenchmarkInternal(                 \
-            new ::benchmark::internal::FunctionBenchmark(                  \
-                    #func "/" #test_case_name,                             \
-                    [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
+#define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
+  BENCHMARK_PRIVATE_DECLARE(func) =                      \
+      (::benchmark::internal::RegisterBenchmarkInternal( \
+          new ::benchmark::internal::FunctionBenchmark(  \
+              #func "/" #test_case_name,                 \
+              [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
 
-#endif // __cplusplus >= 11
+#endif  // __cplusplus >= 11
 
 // This will register a benchmark for a templatized function.  For example:
 //
@@ -784,54 +805,54 @@ protected:
 // BENCHMARK_TEMPLATE(BM_Foo, 1);
 //
 // will register BM_Foo<1> as a benchmark.
-#define BENCHMARK_TEMPLATE1(n, a) \
-  BENCHMARK_PRIVATE_DECLARE(n) =  \
-      (::benchmark::internal::RegisterBenchmarkInternal( \
-        new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
-
-#define BENCHMARK_TEMPLATE2(n, a, b)                     \
+#define BENCHMARK_TEMPLATE1(n, a)                        \
   BENCHMARK_PRIVATE_DECLARE(n) =                         \
       (::benchmark::internal::RegisterBenchmarkInternal( \
-        new ::benchmark::internal::FunctionBenchmark(    \
-            #n "<" #a "," #b ">", n<a, b>)))
+          new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
+
+#define BENCHMARK_TEMPLATE2(n, a, b)                                         \
+  BENCHMARK_PRIVATE_DECLARE(n) =                                             \
+      (::benchmark::internal::RegisterBenchmarkInternal(                     \
+          new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
+                                                       n<a, b>)))
 
 #if __cplusplus >= 201103L
-#define BENCHMARK_TEMPLATE(n, ...)           \
-  BENCHMARK_PRIVATE_DECLARE(n) =             \
+#define BENCHMARK_TEMPLATE(n, ...)                       \
+  BENCHMARK_PRIVATE_DECLARE(n) =                         \
       (::benchmark::internal::RegisterBenchmarkInternal( \
-        new ::benchmark::internal::FunctionBenchmark( \
-        #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
+          new ::benchmark::internal::FunctionBenchmark(  \
+              #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
 #else
 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
 #endif
 
+#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)        \
+  class BaseClass##_##Method##_Benchmark : public BaseClass { \
+   public:                                                    \
+    BaseClass##_##Method##_Benchmark() : BaseClass() {        \
+      this->SetName(#BaseClass "/" #Method);                  \
+    }                                                         \
+                                                              \
+   protected:                                                 \
+    virtual void BenchmarkCase(::benchmark::State&);          \
+  };
 
-#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)      \
-class BaseClass##_##Method##_Benchmark : public BaseClass { \
-public:                                                     \
-    BaseClass##_##Method##_Benchmark() : BaseClass() {      \
-        this->SetName(#BaseClass "/" #Method);}             \
-protected:                                                  \
-    virtual void BenchmarkCase(::benchmark::State&);        \
-};
-
-#define BENCHMARK_DEFINE_F(BaseClass, Method) \
-    BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
-    void BaseClass##_##Method##_Benchmark::BenchmarkCase
+#define BENCHMARK_DEFINE_F(BaseClass, Method)    \
+  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
 
 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
-    BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
+  BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
 
 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
-    BENCHMARK_PRIVATE_DECLARE(TestName) = \
-        (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
+  BENCHMARK_PRIVATE_DECLARE(TestName) =        \
+      (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
 
 // This macro will define and register a benchmark within a fixture class.
-#define BENCHMARK_F(BaseClass, Method) \
-    BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
-    BENCHMARK_REGISTER_F(BaseClass, Method); \
-    void BaseClass##_##Method##_Benchmark::BenchmarkCase
-
+#define BENCHMARK_F(BaseClass, Method)           \
+  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
+  BENCHMARK_REGISTER_F(BaseClass, Method);       \
+  void BaseClass##_##Method##_Benchmark::BenchmarkCase
 
 // Helper macro to create a main routine in a test that runs the benchmarks
 #define BENCHMARK_MAIN()                   \
diff --git a/utils/google-benchmark/include/benchmark/macros.h b/utils/google-benchmark/include/benchmark/macros.h
index 690c1fd7a..2466fd3fa 100644
--- a/utils/google-benchmark/include/benchmark/macros.h
+++ b/utils/google-benchmark/include/benchmark/macros.h
@@ -19,44 +19,44 @@
 #endif
 
 #ifndef BENCHMARK_HAS_CXX11
-# define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName)  \
-    TypeName(const TypeName&);                         \
-    TypeName& operator=(const TypeName&)
+#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);                         \
+  TypeName& operator=(const TypeName&)
 #else
-# define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName)  \
-    TypeName(const TypeName&) = delete;                \
-    TypeName& operator=(const TypeName&) = delete
+#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&) = delete;                \
+  TypeName& operator=(const TypeName&) = delete
 #endif
 
 #if defined(__GNUC__)
-# define BENCHMARK_UNUSED __attribute__((unused))
-# define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
-# define BENCHMARK_NOEXCEPT noexcept
-# define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+#define BENCHMARK_UNUSED __attribute__((unused))
+#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
+#define BENCHMARK_NOEXCEPT noexcept
+#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
 #elif defined(_MSC_VER) && !defined(__clang__)
-# define BENCHMARK_UNUSED
-# define BENCHMARK_ALWAYS_INLINE __forceinline
-# if _MSC_VER >= 1900
-#  define BENCHMARK_NOEXCEPT noexcept
-#  define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
-# else
-#  define BENCHMARK_NOEXCEPT
-#  define BENCHMARK_NOEXCEPT_OP(x)
-# endif
-# define __func__ __FUNCTION__
+#define BENCHMARK_UNUSED
+#define BENCHMARK_ALWAYS_INLINE __forceinline
+#if _MSC_VER >= 1900
+#define BENCHMARK_NOEXCEPT noexcept
+#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
 #else
-# define BENCHMARK_UNUSED
-# define BENCHMARK_ALWAYS_INLINE
-# define BENCHMARK_NOEXCEPT
-# define BENCHMARK_NOEXCEPT_OP(x)
+#define BENCHMARK_NOEXCEPT
+#define BENCHMARK_NOEXCEPT_OP(x)
+#endif
+#define __func__ __FUNCTION__
+#else
+#define BENCHMARK_UNUSED
+#define BENCHMARK_ALWAYS_INLINE
+#define BENCHMARK_NOEXCEPT
+#define BENCHMARK_NOEXCEPT_OP(x)
 #endif
 
 #if defined(__GNUC__)
-# define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
-# define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
+#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
+#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
 #else
-# define BENCHMARK_BUILTIN_EXPECT(x, y) x
-# define BENCHMARK_DEPRECATED_MSG(msg)
+#define BENCHMARK_BUILTIN_EXPECT(x, y) x
+#define BENCHMARK_DEPRECATED_MSG(msg)
 #endif
 
 #if defined(__GNUC__) && !defined(__clang__)
diff --git a/utils/google-benchmark/include/benchmark/reporter.h b/utils/google-benchmark/include/benchmark/reporter.h
index 6d36c41b1..8c39e7f7f 100644
--- a/utils/google-benchmark/include/benchmark/reporter.h
+++ b/utils/google-benchmark/include/benchmark/reporter.h
@@ -41,19 +41,20 @@ class BenchmarkReporter {
   };
 
   struct Run {
-    Run() :
-      error_occurred(false),
-      iterations(1),
-      time_unit(kNanosecond),
-      real_accumulated_time(0),
-      cpu_accumulated_time(0),
-      bytes_per_second(0),
-      items_per_second(0),
-      max_heapbytes_used(0),
-      complexity(oNone),
-      complexity_n(0),
-      report_big_o(false),
-      report_rms(false) {}
+    Run()
+        : error_occurred(false),
+          iterations(1),
+          time_unit(kNanosecond),
+          real_accumulated_time(0),
+          cpu_accumulated_time(0),
+          bytes_per_second(0),
+          items_per_second(0),
+          max_heapbytes_used(0),
+          complexity(oNone),
+          complexity_lambda(),
+          complexity_n(0),
+          report_big_o(false),
+          report_rms(false) {}
 
     std::string benchmark_name;
     std::string report_label;  // Empty if not set by benchmark.
@@ -133,13 +134,9 @@ class BenchmarkReporter {
     error_stream_ = err;
   }
 
-  std::ostream& GetOutputStream() const {
-    return *output_stream_;
-  }
+  std::ostream& GetOutputStream() const { return *output_stream_; }
 
-  std::ostream& GetErrorStream() const {
-    return *error_stream_;
-  }
+  std::ostream& GetErrorStream() const { return *error_stream_; }
 
   virtual ~BenchmarkReporter();
 
@@ -156,22 +153,19 @@ class BenchmarkReporter {
 // Simple reporter that outputs benchmark data to the console. This is the
 // default reporter used by RunSpecifiedBenchmarks().
 class ConsoleReporter : public BenchmarkReporter {
-public:
-  enum OutputOptions {
-    OO_None,
-    OO_Color
-  };
+ public:
+  enum OutputOptions { OO_None, OO_Color };
   explicit ConsoleReporter(OutputOptions color_output = OO_Color)
-      : color_output_(color_output == OO_Color) {}
+      : name_field_width_(0), color_output_(color_output == OO_Color) {}
 
   virtual bool ReportContext(const Context& context);
   virtual void ReportRuns(const std::vector<Run>& reports);
 
-protected:
+ protected:
   virtual void PrintRunData(const Run& report);
   size_t name_field_width_;
 
-private:
+ private:
   bool color_output_;
 };
 
diff --git a/utils/google-benchmark/src/CMakeLists.txt b/utils/google-benchmark/src/CMakeLists.txt
index a65723b09..403887517 100644
--- a/utils/google-benchmark/src/CMakeLists.txt
+++ b/utils/google-benchmark/src/CMakeLists.txt
@@ -6,24 +6,13 @@ if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
   list(APPEND CMAKE_MODULE_LINKER_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
 endif()
 
-# Define the source files
-set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc"
-                 "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc"
-                 "log.cc" "reporter.cc" "sleep.cc" "string_util.cc"
-                 "sysinfo.cc" "walltime.cc" "complexity.cc")
-# Determine the correct regular expression engine to use
-if(HAVE_STD_REGEX)
-  set(RE_FILES "re_std.cc")
-elseif(HAVE_GNU_POSIX_REGEX)
-  set(RE_FILES "re_posix.cc")
-elseif(HAVE_POSIX_REGEX)
-  set(RE_FILES "re_posix.cc")
-else()
-  message(FATAL_ERROR "Failed to determine the source files for the regular expression backend")
-endif()
-
-add_library(benchmark ${SOURCE_FILES} ${RE_FILES})
+file(GLOB
+  SOURCE_FILES
+    *.cc
+    ${PROJECT_SOURCE_DIR}/include/benchmark/*.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
 
+add_library(benchmark ${SOURCE_FILES})
 set_target_properties(benchmark PROPERTIES
   OUTPUT_NAME "benchmark"
   VERSION ${GENERIC_LIB_VERSION}
diff --git a/utils/google-benchmark/src/arraysize.h b/utils/google-benchmark/src/arraysize.h
index 638a52a0e..51a50f2df 100644
--- a/utils/google-benchmark/src/arraysize.h
+++ b/utils/google-benchmark/src/arraysize.h
@@ -11,7 +11,6 @@ namespace internal {
 // a pointer by mistake, you will get a compile-time error.
 //
 
-
 // This template function declaration is used in defining arraysize.
 // Note that the function doesn't need an implementation, as we only
 // use its type.
@@ -28,7 +27,7 @@ char (&ArraySizeHelper(const T (&array)[N]))[N];
 
 #define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array)))
 
-} // end namespace internal
-} // end namespace benchmark
+}  // end namespace internal
+}  // end namespace benchmark
 
-#endif // BENCHMARK_ARRAYSIZE_H_
+#endif  // BENCHMARK_ARRAYSIZE_H_
diff --git a/utils/google-benchmark/src/benchmark.cc b/utils/google-benchmark/src/benchmark.cc
index bfac28c23..95f6a25b7 100644
--- a/utils/google-benchmark/src/benchmark.cc
+++ b/utils/google-benchmark/src/benchmark.cc
@@ -13,26 +13,28 @@
 // limitations under the License.
 
 #include "benchmark/benchmark.h"
+#include "benchmark_api_internal.h"
 #include "internal_macros.h"
 
 #ifndef BENCHMARK_OS_WINDOWS
-#include <sys/time.h>
 #include <sys/resource.h>
+#include <sys/time.h>
 #include <unistd.h>
 #endif
 
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
 #include <algorithm>
 #include <atomic>
 #include <condition_variable>
-#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 #include <fstream>
+#include <iostream>
 #include <memory>
 #include <thread>
 
 #include "check.h"
+#include "colorprint.h"
 #include "commandlineflags.h"
 #include "complexity.h"
 #include "log.h"
@@ -41,7 +43,7 @@
 #include "stat.h"
 #include "string_util.h"
 #include "sysinfo.h"
-#include "walltime.h"
+#include "timers.h"
 
 DEFINE_bool(benchmark_list_tests, false,
             "Print a list of benchmarks. This option overrides all other "
@@ -81,863 +83,258 @@ DEFINE_string(benchmark_out_format, "json",
 
 DEFINE_string(benchmark_out, "", "The file to write additonal output to");
 
-DEFINE_bool(color_print, true, "Enables colorized logging.");
+DEFINE_string(benchmark_color, "auto",
+              "Whether to use colors in the output.  Valid values: "
+              "'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use "
+              "colors if the output is being sent to a terminal and the TERM "
+              "environment variable is set to a terminal type that supports "
+              "colors.");
 
 DEFINE_int32(v, 0, "The level of verbose logging to output");
 
-
 namespace benchmark {
-
 namespace internal {
 
 void UseCharPointer(char const volatile*) {}
 
-// NOTE: This is a dummy "mutex" type used to denote the actual mutex
-// returned by GetBenchmarkLock(). This is only used to placate the thread
-// safety warnings by giving the return of GetBenchmarkLock() a name.
-struct CAPABILITY("mutex") BenchmarkLockType {};
-BenchmarkLockType BenchmarkLockVar;
-
-} // end namespace internal
-
-inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar)
-GetBenchmarkLock()
-{
-  static Mutex lock;
-  return lock;
-}
+}  // end namespace internal
 
 namespace {
 
-bool IsZero(double n) {
-    return std::abs(n) < std::numeric_limits<double>::epsilon();
-}
-
-// For non-dense Range, intermediate values are powers of kRangeMultiplier.
-static const int kRangeMultiplier = 8;
-// The size of a benchmark family determines is the number of inputs to repeat
-// the benchmark on. If this is "large" then warn the user during configuration.
-static const size_t kMaxFamilySize = 100;
 static const size_t kMaxIterations = 1000000000;
 
-bool running_benchmark = false;
+}  // end namespace
 
-// Global variable so that a benchmark can cause a little extra printing
-std::string* GetReportLabel() {
-    static std::string label GUARDED_BY(GetBenchmarkLock());
-    return &label;
-}
+namespace internal {
 
-// Global variable so that a benchmark can report an error as a human readable
-// string. If error_message is null no error occurred.
-#if defined(_MSC_VER) && _MSC_VER <= 1800
-typedef char* error_message_type;
-#else
-typedef const char* error_message_type;
-#endif
+class ThreadManager {
+ public:
+  ThreadManager(int num_threads)
+      : alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
 
-static std::atomic<error_message_type> error_message = ATOMIC_VAR_INIT(nullptr);
+  Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) {
+    return benchmark_mutex_;
+  }
 
-// TODO(ericwf): support MallocCounter.
-//static benchmark::MallocCounter *benchmark_mc;
+  bool StartStopBarrier() EXCLUDES(end_cond_mutex_) {
+    return start_stop_barrier_.wait();
+  }
 
-struct ThreadStats {
-    ThreadStats() : bytes_processed(0), items_processed(0), complexity_n(0) {}
-    int64_t bytes_processed;
-    int64_t items_processed;
-    int  complexity_n;
+  void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) {
+    start_stop_barrier_.removeThread();
+    if (--alive_threads_ == 0) {
+      MutexLock lock(end_cond_mutex_);
+      end_condition_.notify_all();
+    }
+  }
+
+  void WaitForAllThreads() EXCLUDES(end_cond_mutex_) {
+    MutexLock lock(end_cond_mutex_);
+    end_condition_.wait(lock.native_handle(),
+                        [this]() { return alive_threads_ == 0; });
+  }
+
+ public:
+  struct Result {
+    double real_time_used = 0;
+    double cpu_time_used = 0;
+    double manual_time_used = 0;
+    int64_t bytes_processed = 0;
+    int64_t items_processed = 0;
+    int complexity_n = 0;
+    std::string report_label_;
+    std::string error_message_;
+    bool has_error_ = false;
+  };
+  GUARDED_BY(GetBenchmarkMutex()) Result results;
+
+ private:
+  mutable Mutex benchmark_mutex_;
+  std::atomic<int> alive_threads_;
+  Barrier start_stop_barrier_;
+  Mutex end_cond_mutex_;
+  Condition end_condition_;
 };
 
 // Timer management class
-class TimerManager {
+class ThreadTimer {
  public:
-  TimerManager(int num_threads, Notification* done)
-      : num_threads_(num_threads),
-        running_threads_(num_threads),
-        done_(done),
-        running_(false),
-        real_time_used_(0),
-        cpu_time_used_(0),
-        manual_time_used_(0),
-        num_finalized_(0),
-        phase_number_(0),
-        entered_(0)
-  {
+  ThreadTimer() = default;
+
+  // Called by each thread
+  void StartTimer() {
+    running_ = true;
+    start_real_time_ = ChronoClockNow();
+    start_cpu_time_ = ThreadCPUUsage();
   }
 
   // Called by each thread
-  void StartTimer() EXCLUDES(lock_) {
-    bool last_thread = false;
-    {
-      MutexLock ml(lock_);
-      last_thread = Barrier(ml);
-      if (last_thread) {
-        CHECK(!running_) << "Called StartTimer when timer is already running";
-        running_ = true;
-        start_real_time_ = walltime::Now();
-        start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage();
-       }
-     }
-     if (last_thread) {
-       phase_condition_.notify_all();
-     }
+  void StopTimer() {
+    CHECK(running_);
+    running_ = false;
+    real_time_used_ += ChronoClockNow() - start_real_time_;
+    cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_;
   }
 
   // Called by each thread
-  void StopTimer() EXCLUDES(lock_) {
-    bool last_thread = false;
-    {
-      MutexLock ml(lock_);
-      last_thread = Barrier(ml);
-      if (last_thread) {
-        CHECK(running_) << "Called StopTimer when timer is already stopped";
-        InternalStop();
-      }
-    }
-    if (last_thread) {
-      phase_condition_.notify_all();
-    }
-  }
+  void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
 
-  // Called by each thread
-  void SetIterationTime(double seconds) EXCLUDES(lock_) {
-    bool last_thread = false;
-    {
-      MutexLock ml(lock_);
-      last_thread = Barrier(ml);
-      if (last_thread) {
-        manual_time_used_ += seconds;
-      }
-    }
-    if (last_thread) {
-      phase_condition_.notify_all();
-    }
-  }
-
-  // Called by each thread
-  void Finalize() EXCLUDES(lock_) {
-    MutexLock l(lock_);
-    num_finalized_++;
-    if (num_finalized_ == num_threads_) {
-      CHECK(!running_) <<
-        "The timer should be stopped before the timer is finalized";
-      done_->Notify();
-    }
-  }
-
-  void RemoveErroredThread() EXCLUDES(lock_) {
-    MutexLock ml(lock_);
-    int last_thread = --running_threads_ == 0;
-    if (last_thread && running_)
-      InternalStop();
-    else if (!last_thread)
-      phase_condition_.notify_all();
-  }
+  bool running() const { return running_; }
 
   // REQUIRES: timer is not running
-  double real_time_used() EXCLUDES(lock_) {
-    MutexLock l(lock_);
+  double real_time_used() {
     CHECK(!running_);
     return real_time_used_;
   }
 
   // REQUIRES: timer is not running
-  double cpu_time_used() EXCLUDES(lock_) {
-    MutexLock l(lock_);
+  double cpu_time_used() {
     CHECK(!running_);
     return cpu_time_used_;
   }
 
   // REQUIRES: timer is not running
-  double manual_time_used() EXCLUDES(lock_) {
-    MutexLock l(lock_);
+  double manual_time_used() {
     CHECK(!running_);
     return manual_time_used_;
   }
 
  private:
-  Mutex lock_;
-  Condition phase_condition_;
-  int num_threads_;
-  int running_threads_;
-  Notification* done_;
-
-  bool running_;                // Is the timer running
-  double start_real_time_;      // If running_
-  double start_cpu_time_;       // If running_
+  bool running_ = false;        // Is the timer running
+  double start_real_time_ = 0;  // If running_
+  double start_cpu_time_ = 0;   // If running_
 
   // Accumulated time so far (does not contain current slice if running_)
-  double real_time_used_;
-  double cpu_time_used_;
+  double real_time_used_ = 0;
+  double cpu_time_used_ = 0;
   // Manually set iteration time. User sets this with SetIterationTime(seconds).
-  double manual_time_used_;
-
-  // How many threads have called Finalize()
-  int num_finalized_;
-
-  // State for barrier management
-  int phase_number_;
-  int entered_;         // Number of threads that have entered this barrier
-
-  void InternalStop() REQUIRES(lock_) {
-    CHECK(running_);
-    running_ = false;
-    real_time_used_ += walltime::Now() - start_real_time_;
-    cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage())
-                       - start_cpu_time_);
-  }
-
-  // Enter the barrier and wait until all other threads have also
-  // entered the barrier.  Returns iff this is the last thread to
-  // enter the barrier.
-  bool Barrier(MutexLock& ml) REQUIRES(lock_) {
-    CHECK_LT(entered_, running_threads_);
-    entered_++;
-    if (entered_ < running_threads_) {
-      // Wait for all threads to enter
-      int phase_number_cp = phase_number_;
-      auto cb = [this, phase_number_cp]() {
-        return this->phase_number_ > phase_number_cp ||
-               entered_ == running_threads_; // A thread has aborted in error
-      };
-      phase_condition_.wait(ml.native_handle(), cb);
-      if (phase_number_ > phase_number_cp)
-        return false;
-      // else (running_threads_ == entered_) and we are the last thread.
-    }
-    // Last thread has reached the barrier
-    phase_number_++;
-    entered_ = 0;
-    return true;
-  }
+  double manual_time_used_ = 0;
 };
 
-// TimerManager for current run.
-static std::unique_ptr<TimerManager> timer_manager = nullptr;
-
-} // end namespace
-
-namespace internal {
-
-enum ReportMode : unsigned {
-    RM_Unspecified, // The mode has not been manually specified
-    RM_Default,     // The mode is user-specified as default.
-    RM_ReportAggregatesOnly
-};
-
-// Information kept per benchmark we may want to run
-struct Benchmark::Instance {
-  std::string      name;
-  Benchmark*       benchmark;
-  ReportMode       report_mode;
-  std::vector<int> arg;
-  TimeUnit         time_unit;
-  int              range_multiplier;
-  bool             use_real_time;
-  bool             use_manual_time;
-  BigO             complexity;
-  BigOFunc*        complexity_lambda;
-  bool             last_benchmark_instance;
-  int              repetitions;
-  double           min_time;
-  int              threads;    // Number of concurrent threads to use
-  bool             multithreaded;  // Is benchmark multi-threaded?
-};
-
-// Class for managing registered benchmarks.  Note that each registered
-// benchmark identifies a family of related benchmarks to run.
-class BenchmarkFamilies {
- public:
-  static BenchmarkFamilies* GetInstance();
-
-  // Registers a benchmark family and returns the index assigned to it.
-  size_t AddBenchmark(std::unique_ptr<Benchmark> family);
-
-  // Extract the list of benchmark instances that match the specified
-  // regular expression.
-  bool FindBenchmarks(const std::string& re,
-                      std::vector<Benchmark::Instance>* benchmarks,
-                      std::ostream* Err);
- private:
-  BenchmarkFamilies() {}
-
-  std::vector<std::unique_ptr<Benchmark>> families_;
-  Mutex mutex_;
-};
-
-
-class BenchmarkImp {
-public:
-  explicit BenchmarkImp(const char* name);
-  ~BenchmarkImp();
-
-  void Arg(int x);
-  void Unit(TimeUnit unit);
-  void Range(int start, int limit);
-  void DenseRange(int start, int limit, int step = 1);
-  void Args(const std::vector<int>& args);
-  void Ranges(const std::vector<std::pair<int, int>>& ranges);
-  void RangeMultiplier(int multiplier);
-  void MinTime(double n);
-  void Repetitions(int n);
-  void ReportAggregatesOnly(bool v);
-  void UseRealTime();
-  void UseManualTime();
-  void Complexity(BigO complexity);
-  void ComplexityLambda(BigOFunc* complexity);
-  void Threads(int t);
-  void ThreadRange(int min_threads, int max_threads);
-  void ThreadPerCpu();
-  void SetName(const char* name);
-
-  static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
-
-  int ArgsCnt() const { return args_.empty() ? -1 : static_cast<int>(args_.front().size()); }
-
-private:
-  friend class BenchmarkFamilies;
-
-  std::string name_;
-  ReportMode report_mode_;
-  std::vector< std::vector<int> > args_;  // Args for all benchmark runs
-  TimeUnit time_unit_;
-  int range_multiplier_;
-  double min_time_;
-  int repetitions_;
-  bool use_real_time_;
-  bool use_manual_time_;
-  BigO complexity_;
-  BigOFunc* complexity_lambda_;
-  std::vector<int> thread_counts_;
-
-  BenchmarkImp& operator=(BenchmarkImp const&);
-};
-
-BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
-  static BenchmarkFamilies instance;
-  return &instance;
-}
-
-
-size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
-  MutexLock l(mutex_);
-  size_t index = families_.size();
-  families_.push_back(std::move(family));
-  return index;
-}
-
-bool BenchmarkFamilies::FindBenchmarks(
-    const std::string& spec,
-    std::vector<Benchmark::Instance>* benchmarks,
-    std::ostream* ErrStream) {
-  CHECK(ErrStream);
-  auto& Err = *ErrStream;
-  // Make regular expression out of command-line flag
-  std::string error_msg;
-  Regex re;
-  if (!re.Init(spec, &error_msg)) {
-    Err << "Could not compile benchmark re: " << error_msg << std::endl;
-    return false;
-  }
-
-  // Special list of thread counts to use when none are specified
-  const std::vector<int> one_thread = {1};
-
-  MutexLock l(mutex_);
-  for (std::unique_ptr<Benchmark>& bench_family : families_) {
-    // Family was deleted or benchmark doesn't match
-    if (!bench_family) continue;
-    BenchmarkImp* family = bench_family->imp_;
-
-    if (family->ArgsCnt() == -1) {
-      family->Args({});
-    }
-    const std::vector<int>* thread_counts =
-        (family->thread_counts_.empty()
-         ? &one_thread
-         : &static_cast<const std::vector<int>&>(family->thread_counts_));
-    const size_t family_size = family->args_.size() * thread_counts->size();
-    // The benchmark will be run at least 'family_size' different inputs.
-    // If 'family_size' is very large warn the user.
-    if (family_size > kMaxFamilySize) {
-      Err <<  "The number of inputs is very large. " << family->name_
-          << " will be repeated at least " << family_size << " times.\n";
-    }
-    // reserve in the special case the regex ".", since we know the final
-    // family size.
-    if (spec == ".")
-      benchmarks->reserve(family_size);
-
-    for (auto const& args : family->args_) {
-      for (int num_threads : *thread_counts) {
-
-        Benchmark::Instance instance;
-        instance.name = family->name_;
-        instance.benchmark = bench_family.get();
-        instance.report_mode = family->report_mode_;
-        instance.arg = args;
-        instance.time_unit = family->time_unit_;
-        instance.range_multiplier = family->range_multiplier_;
-        instance.min_time = family->min_time_;
-        instance.repetitions = family->repetitions_;
-        instance.use_real_time = family->use_real_time_;
-        instance.use_manual_time = family->use_manual_time_;
-        instance.complexity = family->complexity_;
-        instance.complexity_lambda = family->complexity_lambda_;
-        instance.threads = num_threads;
-        instance.multithreaded = !(family->thread_counts_.empty());
-
-        // Add arguments to instance name
-        for (auto const& arg : args) {
-          AppendHumanReadable(arg, &instance.name);
-        }
-
-        if (!IsZero(family->min_time_)) {
-          instance.name +=  StringPrintF("/min_time:%0.3f",  family->min_time_);
-        }
-        if (family->repetitions_ != 0) {
-          instance.name +=  StringPrintF("/repeats:%d",  family->repetitions_);
-        }
-        if (family->use_manual_time_) {
-          instance.name +=  "/manual_time";
-        } else if (family->use_real_time_) {
-          instance.name +=  "/real_time";
-        }
-
-        // Add the number of threads used to the name
-        if (!family->thread_counts_.empty()) {
-          instance.name += StringPrintF("/threads:%d", instance.threads);
-        }
-
-        if (re.Match(instance.name)) {
-          instance.last_benchmark_instance = (&args == &family->args_.back());
-          benchmarks->push_back(std::move(instance));
-        }
-      }
-    }
-  }
-  return true;
-}
-
-BenchmarkImp::BenchmarkImp(const char* name)
-    : name_(name), report_mode_(RM_Unspecified), time_unit_(kNanosecond),
-      range_multiplier_(kRangeMultiplier), min_time_(0.0), repetitions_(0),
-      use_real_time_(false), use_manual_time_(false),
-      complexity_(oNone) {
-}
-
-BenchmarkImp::~BenchmarkImp() {
-}
-
-void BenchmarkImp::Arg(int x) {
-  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
-  args_.push_back({x});
-}
-
-void BenchmarkImp::Unit(TimeUnit unit) {
-  time_unit_ = unit;
-}
-
-void BenchmarkImp::Range(int start, int limit) {
-  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
-  std::vector<int> arglist;
-  AddRange(&arglist, start, limit, range_multiplier_);
-
-  for (int i : arglist) {
-    args_.push_back({i});
-  }
-}
-
-void BenchmarkImp::DenseRange(int start, int limit, int step) {
-  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
-  CHECK_GE(start, 0);
-  CHECK_LE(start, limit);
-  for (int arg = start; arg <= limit; arg+= step) {
-    args_.push_back({arg});
-  }
-}
-
-void BenchmarkImp::Args(const std::vector<int>& args)
-{
-  args_.push_back(args);
-}
-
-void BenchmarkImp::Ranges(const std::vector<std::pair<int, int>>& ranges) {
-  std::vector<std::vector<int>> arglists(ranges.size());
-  std::size_t total = 1;
-  for (std::size_t i = 0; i < ranges.size(); i++) {
-    AddRange(&arglists[i], ranges[i].first, ranges[i].second, range_multiplier_);
-    total *= arglists[i].size();
-  }
-
-  std::vector<std::size_t> ctr(arglists.size(), 0);
-
-  for (std::size_t i = 0; i < total; i++) {
-    std::vector<int> tmp;
-    tmp.reserve(arglists.size());
-
-    for (std::size_t j = 0; j < arglists.size(); j++) {
-      tmp.push_back(arglists[j].at(ctr[j]));
-    }
-
-    args_.push_back(std::move(tmp));
-
-    for (std::size_t j = 0; j < arglists.size(); j++) {
-      if (ctr[j] + 1 < arglists[j].size()) {
-        ++ctr[j];
-        break;
-      }
-      ctr[j] = 0;
-    }
-  }
-}
-
-void BenchmarkImp::RangeMultiplier(int multiplier) {
-  CHECK(multiplier > 1);
-  range_multiplier_ = multiplier;
-}
-
-void BenchmarkImp::MinTime(double t) {
-  CHECK(t > 0.0);
-  min_time_ = t;
-}
-
-
-void BenchmarkImp::Repetitions(int n) {
-  CHECK(n > 0);
-  repetitions_ = n;
-}
-
-void BenchmarkImp::ReportAggregatesOnly(bool value) {
-  report_mode_ = value ? RM_ReportAggregatesOnly : RM_Default;
-}
-
-void BenchmarkImp::UseRealTime() {
-  CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
-  use_real_time_ = true;
-}
-
-void BenchmarkImp::UseManualTime() {
-  CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
-  use_manual_time_ = true;
-}
-
-void BenchmarkImp::Complexity(BigO complexity){
-  complexity_ = complexity;
-}
-
-void BenchmarkImp::ComplexityLambda(BigOFunc* complexity) {
-  complexity_lambda_ = complexity;
-}
-
-void BenchmarkImp::Threads(int t) {
-  CHECK_GT(t, 0);
-  thread_counts_.push_back(t);
-}
-
-void BenchmarkImp::ThreadRange(int min_threads, int max_threads) {
-  CHECK_GT(min_threads, 0);
-  CHECK_GE(max_threads, min_threads);
-
-  AddRange(&thread_counts_, min_threads, max_threads, 2);
-}
-
-void BenchmarkImp::ThreadPerCpu() {
-  static int num_cpus = NumCPUs();
-  thread_counts_.push_back(num_cpus);
-}
-
-void BenchmarkImp::SetName(const char* name) {
-  name_ = name;
-}
-
-void BenchmarkImp::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
-  CHECK_GE(lo, 0);
-  CHECK_GE(hi, lo);
-  CHECK_GE(mult, 2);
-
-  // Add "lo"
-  dst->push_back(lo);
-
-  static const int kint32max = std::numeric_limits<int32_t>::max();
-
-  // Now space out the benchmarks in multiples of "mult"
-  for (int32_t i = 1; i < kint32max/mult; i *= mult) {
-    if (i >= hi) break;
-    if (i > lo) {
-      dst->push_back(i);
-    }
-  }
-  // Add "hi" (if different from "lo")
-  if (hi != lo) {
-    dst->push_back(hi);
-  }
-}
-
-Benchmark::Benchmark(const char* name)
-    : imp_(new BenchmarkImp(name))
-{
-}
-
-Benchmark::~Benchmark()  {
-  delete imp_;
-}
-
-Benchmark::Benchmark(Benchmark const& other)
-  : imp_(new BenchmarkImp(*other.imp_))
-{
-}
-
-Benchmark* Benchmark::Arg(int x) {
-  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
-  imp_->Arg(x);
-  return this;
-}
-
-Benchmark* Benchmark::Unit(TimeUnit unit) {
-  imp_->Unit(unit);
-  return this;
-}
-
-Benchmark* Benchmark::Range(int start, int limit) {
-  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
-  imp_->Range(start, limit);
-  return this;
-}
-
-Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges)
-{
-  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == static_cast<int>(ranges.size()));
-  imp_->Ranges(ranges);
-  return this;
-}
-
-Benchmark* Benchmark::DenseRange(int start, int limit, int step) {
-  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
-  imp_->DenseRange(start, limit, step);
-  return this;
-}
-
-Benchmark* Benchmark::Args(const std::vector<int>& args) {
-  CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == static_cast<int>(args.size()));
-  imp_->Args(args);
-  return this;
-}
-
-Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
-  custom_arguments(this);
-  return this;
-}
-
-Benchmark* Benchmark::RangeMultiplier(int multiplier) {
-  imp_->RangeMultiplier(multiplier);
-  return this;
-}
-
-
-Benchmark* Benchmark::Repetitions(int t) {
-  imp_->Repetitions(t);
-  return this;
-}
-
-Benchmark* Benchmark::ReportAggregatesOnly(bool value) {
-  imp_->ReportAggregatesOnly(value);
-  return this;
-}
-
-Benchmark* Benchmark::MinTime(double t) {
-  imp_->MinTime(t);
-  return this;
-}
-
-Benchmark* Benchmark::UseRealTime() {
-  imp_->UseRealTime();
-  return this;
-}
-
-Benchmark* Benchmark::UseManualTime() {
-  imp_->UseManualTime();
-  return this;
-}
-
-Benchmark* Benchmark::Complexity(BigO complexity) {
-  imp_->Complexity(complexity);
-  return this;
-}
-
-Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
-  imp_->Complexity(oLambda);
-  imp_->ComplexityLambda(complexity);
-  return this;
-}
-
-Benchmark* Benchmark::Threads(int t) {
-  imp_->Threads(t);
-  return this;
-}
-
-Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
-  imp_->ThreadRange(min_threads, max_threads);
-  return this;
-}
-
-Benchmark* Benchmark::ThreadPerCpu() {
-  imp_->ThreadPerCpu();
-  return this;
-}
-
-void Benchmark::SetName(const char* name) {
-  imp_->SetName(name);
-}
-
-void FunctionBenchmark::Run(State& st) {
-  func_(st);
-}
-
-} // end namespace internal
-
 namespace {
 
+BenchmarkReporter::Run CreateRunReport(
+    const benchmark::internal::Benchmark::Instance& b,
+    const internal::ThreadManager::Result& results, size_t iters,
+    double seconds) {
+  // Create report about this benchmark run.
+  BenchmarkReporter::Run report;
+
+  report.benchmark_name = b.name;
+  report.error_occurred = results.has_error_;
+  report.error_message = results.error_message_;
+  report.report_label = results.report_label_;
+  // Report the total iterations across all threads.
+  report.iterations = static_cast<int64_t>(iters) * b.threads;
+  report.time_unit = b.time_unit;
+
+  if (!report.error_occurred) {
+    double bytes_per_second = 0;
+    if (results.bytes_processed > 0 && seconds > 0.0) {
+      bytes_per_second = (results.bytes_processed / seconds);
+    }
+    double items_per_second = 0;
+    if (results.items_processed > 0 && seconds > 0.0) {
+      items_per_second = (results.items_processed / seconds);
+    }
+
+    if (b.use_manual_time) {
+      report.real_accumulated_time = results.manual_time_used;
+    } else {
+      report.real_accumulated_time = results.real_time_used;
+    }
+    report.cpu_accumulated_time = results.cpu_time_used;
+    report.bytes_per_second = bytes_per_second;
+    report.items_per_second = items_per_second;
+    report.complexity_n = results.complexity_n;
+    report.complexity = b.complexity;
+    report.complexity_lambda = b.complexity_lambda;
+  }
+  return report;
+}
+
 // Execute one thread of benchmark b for the specified number of iterations.
 // Adds the stats collected for the thread into *total.
 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
                  size_t iters, int thread_id,
-                 ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
-  State st(iters, b->arg, thread_id, b->threads);
+                 internal::ThreadManager* manager) {
+  internal::ThreadTimer timer;
+  State st(iters, b->arg, thread_id, b->threads, &timer, manager);
   b->benchmark->Run(st);
-  CHECK(st.iterations() == st.max_iterations) <<
-    "Benchmark returned before State::KeepRunning() returned false!";
+  CHECK(st.iterations() == st.max_iterations)
+      << "Benchmark returned before State::KeepRunning() returned false!";
   {
-    MutexLock l(GetBenchmarkLock());
-    total->bytes_processed += st.bytes_processed();
-    total->items_processed += st.items_processed();
-    total->complexity_n += st.complexity_length_n();
+    MutexLock l(manager->GetBenchmarkMutex());
+    internal::ThreadManager::Result& results = manager->results;
+    results.cpu_time_used += timer.cpu_time_used();
+    results.real_time_used += timer.real_time_used();
+    results.manual_time_used += timer.manual_time_used();
+    results.bytes_processed += st.bytes_processed();
+    results.items_processed += st.items_processed();
+    results.complexity_n += st.complexity_length_n();
   }
-
-  timer_manager->Finalize();
+  manager->NotifyThreadComplete();
 }
 
-std::vector<BenchmarkReporter::Run>
-RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
-             std::vector<BenchmarkReporter::Run>* complexity_reports)
-  EXCLUDES(GetBenchmarkLock()) {
-  std::vector<BenchmarkReporter::Run> reports; // return value
+std::vector<BenchmarkReporter::Run> RunBenchmark(
+    const benchmark::internal::Benchmark::Instance& b,
+    std::vector<BenchmarkReporter::Run>* complexity_reports) {
+  std::vector<BenchmarkReporter::Run> reports;  // return value
 
   size_t iters = 1;
-
-  std::vector<std::thread> pool;
-  if (b.multithreaded)
-    pool.resize(b.threads);
-
-  const int repeats = b.repetitions != 0 ? b.repetitions
-                                         : FLAGS_benchmark_repetitions;
-  const bool report_aggregates_only = repeats != 1 &&
+  std::unique_ptr<internal::ThreadManager> manager;
+  std::vector<std::thread> pool(b.threads - 1);
+  const int repeats =
+      b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions;
+  const bool report_aggregates_only =
+      repeats != 1 &&
       (b.report_mode == internal::RM_Unspecified
-        ? FLAGS_benchmark_report_aggregates_only
-        : b.report_mode == internal::RM_ReportAggregatesOnly);
+           ? FLAGS_benchmark_report_aggregates_only
+           : b.report_mode == internal::RM_ReportAggregatesOnly);
   for (int i = 0; i < repeats; i++) {
-    std::string mem;
     for (;;) {
       // Try benchmark
       VLOG(2) << "Running " << b.name << " for " << iters << "\n";
 
+      manager.reset(new internal::ThreadManager(b.threads));
+      for (std::size_t ti = 0; ti < pool.size(); ++ti) {
+        pool[ti] = std::thread(&RunInThread, &b, iters,
+                               static_cast<int>(ti + 1), manager.get());
+      }
+      RunInThread(&b, iters, 0, manager.get());
+      manager->WaitForAllThreads();
+      for (std::thread& thread : pool) thread.join();
+      internal::ThreadManager::Result results;
       {
-        MutexLock l(GetBenchmarkLock());
-        GetReportLabel()->clear();
+        MutexLock l(manager->GetBenchmarkMutex());
+        results = manager->results;
       }
-      error_message = nullptr;
+      manager.reset();
+      // Adjust real/manual time stats since they were reported per thread.
+      results.real_time_used /= b.threads;
+      results.manual_time_used /= b.threads;
 
-      Notification done;
-      timer_manager = std::unique_ptr<TimerManager>(new TimerManager(b.threads, &done));
-
-      ThreadStats total;
-      running_benchmark = true;
-      if (b.multithreaded) {
-        // If this is out first iteration of the while(true) loop then the
-        // threads haven't been started and can't be joined. Otherwise we need
-        // to join the thread before replacing them.
-        for (std::thread& thread : pool) {
-          if (thread.joinable())
-            thread.join();
-        }
-        for (std::size_t ti = 0; ti < pool.size(); ++ti) {
-            pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti), &total);
-        }
-      } else {
-        // Run directly in this thread
-        RunInThread(&b, iters, 0, &total);
-      }
-      done.WaitForNotification();
-      running_benchmark = false;
-
-      const double cpu_accumulated_time = timer_manager->cpu_time_used();
-      const double real_accumulated_time = timer_manager->real_time_used();
-      const double manual_accumulated_time = timer_manager->manual_time_used();
-      timer_manager.reset();
-
-      VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
-              << real_accumulated_time << "\n";
+      VLOG(2) << "Ran in " << results.cpu_time_used << "/"
+              << results.real_time_used << "\n";
 
       // Base decisions off of real time if requested by this benchmark.
-      double seconds = cpu_accumulated_time;
+      double seconds = results.cpu_time_used;
       if (b.use_manual_time) {
-          seconds = manual_accumulated_time;
+        seconds = results.manual_time_used;
       } else if (b.use_real_time) {
-          seconds = real_accumulated_time;
+        seconds = results.real_time_used;
       }
 
-      std::string label;
-      {
-        MutexLock l(GetBenchmarkLock());
-        label = *GetReportLabel();
-      }
-      error_message_type error_msg = error_message;
-
-      const double min_time = !IsZero(b.min_time) ? b.min_time
-                                                  : FLAGS_benchmark_min_time;
-
+      const double min_time =
+          !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time;
       // If this was the first run, was elapsed time or cpu time large enough?
       // If this is not the first run, go with the current value of iter.
-      if ((i > 0) || (error_msg != nullptr) ||
-          (iters >= kMaxIterations) ||
-          (seconds >= min_time) ||
-          (real_accumulated_time >= 5*min_time)) {
-
-        // Create report about this benchmark run.
-        BenchmarkReporter::Run report;
-        report.benchmark_name = b.name;
-        report.error_occurred = error_msg != nullptr;
-        report.error_message = error_msg != nullptr ? error_msg : "";
-        report.report_label = label;
-        // Report the total iterations across all threads.
-        report.iterations = static_cast<int64_t>(iters) * b.threads;
-        report.time_unit = b.time_unit;
-
-        if (!report.error_occurred) {
-          double bytes_per_second = 0;
-          if (total.bytes_processed > 0 && seconds > 0.0) {
-            bytes_per_second = (total.bytes_processed / seconds);
-          }
-          double items_per_second = 0;
-          if (total.items_processed > 0 && seconds > 0.0) {
-            items_per_second = (total.items_processed / seconds);
-          }
-
-          if (b.use_manual_time) {
-            report.real_accumulated_time = manual_accumulated_time;
-          } else {
-            report.real_accumulated_time = real_accumulated_time;
-          }
-          report.cpu_accumulated_time = cpu_accumulated_time;
-          report.bytes_per_second = bytes_per_second;
-          report.items_per_second = items_per_second;
-          report.complexity_n = total.complexity_n;
-          report.complexity = b.complexity;
-          report.complexity_lambda = b.complexity_lambda;
-          if(report.complexity != oNone)
-            complexity_reports->push_back(report);
-        }
-
+      if ((i > 0) || results.has_error_ || (iters >= kMaxIterations) ||
+          (seconds >= min_time) || (results.real_time_used >= 5 * min_time)) {
+        BenchmarkReporter::Run report =
+            CreateRunReport(b, results, iters, seconds);
+        if (!report.error_occurred && b.complexity != oNone)
+          complexity_reports->push_back(report);
         reports.push_back(report);
         break;
       }
@@ -961,16 +358,12 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
       iters = static_cast<int>(next_iters + 0.5);
     }
   }
-  if (b.multithreaded) {
-    for (std::thread& thread : pool)
-      thread.join();
-  }
   // Calculate additional statistics
   auto stat_reports = ComputeStats(reports);
-  if((b.complexity != oNone) && b.last_benchmark_instance) {
+  if ((b.complexity != oNone) && b.last_benchmark_instance) {
     auto additional_run_stats = ComputeBigO(*complexity_reports);
     stat_reports.insert(stat_reports.end(), additional_run_stats.begin(),
-                   additional_run_stats.end());
+                        additional_run_stats.end());
     complexity_reports->clear();
   }
 
@@ -980,62 +373,84 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
 }
 
 }  // namespace
+}  // namespace internal
 
-State::State(size_t max_iters, const std::vector<int>& ranges,
-             int thread_i, int n_threads)
-    : started_(false), finished_(false), total_iterations_(0),
+State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
+             int n_threads, internal::ThreadTimer* timer,
+             internal::ThreadManager* manager)
+    : started_(false),
+      finished_(false),
+      total_iterations_(0),
       range_(ranges),
-      bytes_processed_(0), items_processed_(0),
+      bytes_processed_(0),
+      items_processed_(0),
       complexity_n_(0),
       error_occurred_(false),
       thread_index(thread_i),
       threads(n_threads),
-      max_iterations(max_iters)
-{
-    CHECK(max_iterations != 0) << "At least one iteration must be run";
-    CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
+      max_iterations(max_iters),
+      timer_(timer),
+      manager_(manager) {
+  CHECK(max_iterations != 0) << "At least one iteration must be run";
+  CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
 }
 
 void State::PauseTiming() {
   // Add in time accumulated so far
-  CHECK(running_benchmark);
   CHECK(started_ && !finished_ && !error_occurred_);
-  timer_manager->StopTimer();
+  timer_->StopTimer();
 }
 
 void State::ResumeTiming() {
-  CHECK(running_benchmark);
   CHECK(started_ && !finished_ && !error_occurred_);
-  timer_manager->StartTimer();
+  timer_->StartTimer();
 }
 
 void State::SkipWithError(const char* msg) {
   CHECK(msg);
   error_occurred_ = true;
-  error_message_type expected_no_error_msg = nullptr;
-  error_message.compare_exchange_weak(expected_no_error_msg,
-    const_cast<error_message_type>(msg));
-  started_ = finished_ = true;
+  {
+    MutexLock l(manager_->GetBenchmarkMutex());
+    if (manager_->results.has_error_ == false) {
+      manager_->results.error_message_ = msg;
+      manager_->results.has_error_ = true;
+    }
+  }
   total_iterations_ = max_iterations;
-  timer_manager->RemoveErroredThread();
+  if (timer_->running()) timer_->StopTimer();
 }
 
-void State::SetIterationTime(double seconds)
-{
-  CHECK(running_benchmark);
-  timer_manager->SetIterationTime(seconds);
+void State::SetIterationTime(double seconds) {
+  timer_->SetIterationTime(seconds);
 }
 
 void State::SetLabel(const char* label) {
-  CHECK(running_benchmark);
-  MutexLock l(GetBenchmarkLock());
-  *GetReportLabel() = label;
+  MutexLock l(manager_->GetBenchmarkMutex());
+  manager_->results.report_label_ = label;
+}
+
+void State::StartKeepRunning() {
+  CHECK(!started_ && !finished_);
+  started_ = true;
+  manager_->StartStopBarrier();
+  if (!error_occurred_) ResumeTiming();
+}
+
+void State::FinishKeepRunning() {
+  CHECK(started_ && (!finished_ || error_occurred_));
+  if (!error_occurred_) {
+    PauseTiming();
+  }
+  // Total iterations now is one greater than max iterations. Fix this.
+  total_iterations_ = max_iterations;
+  finished_ = true;
+  manager_->StartStopBarrier();
 }
 
 namespace internal {
 namespace {
 
-void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
+void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
                            BenchmarkReporter* console_reporter,
                            BenchmarkReporter* file_reporter) {
   // Note the file_reporter can be null.
@@ -1049,8 +464,7 @@ void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
         std::max<size_t>(name_field_width, benchmark.name.size());
     has_repetitions |= benchmark.repetitions > 1;
   }
-  if (has_repetitions)
-    name_field_width += std::strlen("_stddev");
+  if (has_repetitions) name_field_width += std::strlen("_stddev");
 
   // Print header here
   BenchmarkReporter::Context context;
@@ -1063,21 +477,35 @@ void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
   // Keep track of runing times of all instances of current benchmark
   std::vector<BenchmarkReporter::Run> complexity_reports;
 
-  if (console_reporter->ReportContext(context)
-      && (!file_reporter || file_reporter->ReportContext(context))) {
+  // We flush streams after invoking reporter methods that write to them. This
+  // ensures users get timely updates even when streams are not line-buffered.
+  auto flushStreams = [](BenchmarkReporter* reporter) {
+    if (!reporter) return;
+    std::flush(reporter->GetOutputStream());
+    std::flush(reporter->GetErrorStream());
+  };
+
+  if (console_reporter->ReportContext(context) &&
+      (!file_reporter || file_reporter->ReportContext(context))) {
+    flushStreams(console_reporter);
+    flushStreams(file_reporter);
     for (const auto& benchmark : benchmarks) {
       std::vector<BenchmarkReporter::Run> reports =
           RunBenchmark(benchmark, &complexity_reports);
       console_reporter->ReportRuns(reports);
       if (file_reporter) file_reporter->ReportRuns(reports);
+      flushStreams(console_reporter);
+      flushStreams(file_reporter);
     }
   }
   console_reporter->Finalize();
   if (file_reporter) file_reporter->Finalize();
+  flushStreams(console_reporter);
+  flushStreams(file_reporter);
 }
 
-std::unique_ptr<BenchmarkReporter>
-CreateReporter(std::string const& name, ConsoleReporter::OutputOptions allow_color) {
+std::unique_ptr<BenchmarkReporter> CreateReporter(
+    std::string const& name, ConsoleReporter::OutputOptions allow_color) {
   typedef std::unique_ptr<BenchmarkReporter> PtrType;
   if (name == "console") {
     return PtrType(new ConsoleReporter(allow_color));
@@ -1091,19 +519,17 @@ CreateReporter(std::string const& name, ConsoleReporter::OutputOptions allow_col
   }
 }
 
-} // end namespace
-} // end namespace internal
+}  // end namespace
+}  // end namespace internal
 
 size_t RunSpecifiedBenchmarks() {
   return RunSpecifiedBenchmarks(nullptr, nullptr);
 }
 
-
 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) {
   return RunSpecifiedBenchmarks(console_reporter, nullptr);
 }
 
-
 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
                               BenchmarkReporter* file_reporter) {
   std::string spec = FLAGS_benchmark_filter;
@@ -1115,10 +541,16 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
   std::unique_ptr<BenchmarkReporter> default_console_reporter;
   std::unique_ptr<BenchmarkReporter> default_file_reporter;
   if (!console_reporter) {
-    auto output_opts = FLAGS_color_print ? ConsoleReporter::OO_Color
-                                          : ConsoleReporter::OO_None;
-    default_console_reporter = internal::CreateReporter(
-          FLAGS_benchmark_format, output_opts);
+    auto output_opts = ConsoleReporter::OO_None;
+    if (FLAGS_benchmark_color == "auto")
+      output_opts = IsColorTerminal() ? ConsoleReporter::OO_Color
+                                      : ConsoleReporter::OO_None;
+    else
+      output_opts = IsTruthyFlagValue(FLAGS_benchmark_color)
+                        ? ConsoleReporter::OO_Color
+                        : ConsoleReporter::OO_None;
+    default_console_reporter =
+        internal::CreateReporter(FLAGS_benchmark_format, output_opts);
     console_reporter = default_console_reporter.get();
   }
   auto& Out = console_reporter->GetOutputStream();
@@ -1127,7 +559,8 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
   std::string const& fname = FLAGS_benchmark_out;
   if (fname == "" && file_reporter) {
     Err << "A custom file reporter was provided but "
-                   "--benchmark_out=<file> was not specified." << std::endl;
+           "--benchmark_out=<file> was not specified."
+        << std::endl;
     std::exit(1);
   }
   if (fname != "") {
@@ -1138,7 +571,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
     }
     if (!file_reporter) {
       default_file_reporter = internal::CreateReporter(
-            FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
+          FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
       file_reporter = default_file_reporter.get();
     }
     file_reporter->SetOutputStream(&output_file);
@@ -1146,14 +579,17 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
   }
 
   std::vector<internal::Benchmark::Instance> benchmarks;
-  auto families = internal::BenchmarkFamilies::GetInstance();
-  if (!families->FindBenchmarks(spec, &benchmarks, &Err)) return 0;
+  if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0;
+
+  if (benchmarks.empty()) {
+    Err << "Failed to match any benchmarks against regex: " << spec << "\n";
+    return 0;
+  }
 
   if (FLAGS_benchmark_list_tests) {
-    for (auto const& benchmark : benchmarks)
-      Out <<  benchmark.name << "\n";
+    for (auto const& benchmark : benchmarks) Out << benchmark.name << "\n";
   } else {
-    internal::RunMatchingBenchmarks(benchmarks, console_reporter, file_reporter);
+    internal::RunBenchmarks(benchmarks, console_reporter, file_reporter);
   }
 
   return benchmarks.size();
@@ -1172,7 +608,7 @@ void PrintUsageAndExit() {
           "          [--benchmark_format=<console|json|csv>]\n"
           "          [--benchmark_out=<filename>]\n"
           "          [--benchmark_out_format=<json|console|csv>]\n"
-          "          [--color_print={true|false}]\n"
+          "          [--benchmark_color={auto|true|false}]\n"
           "          [--v=<verbosity>]\n");
   exit(0);
 }
@@ -1180,25 +616,23 @@ void PrintUsageAndExit() {
 void ParseCommandLineFlags(int* argc, char** argv) {
   using namespace benchmark;
   for (int i = 1; i < *argc; ++i) {
-    if (
-        ParseBoolFlag(argv[i], "benchmark_list_tests",
+    if (ParseBoolFlag(argv[i], "benchmark_list_tests",
                       &FLAGS_benchmark_list_tests) ||
-        ParseStringFlag(argv[i], "benchmark_filter",
-                        &FLAGS_benchmark_filter) ||
+        ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
         ParseDoubleFlag(argv[i], "benchmark_min_time",
                         &FLAGS_benchmark_min_time) ||
         ParseInt32Flag(argv[i], "benchmark_repetitions",
                        &FLAGS_benchmark_repetitions) ||
         ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
-                       &FLAGS_benchmark_report_aggregates_only) ||
-        ParseStringFlag(argv[i], "benchmark_format",
-                        &FLAGS_benchmark_format) ||
-        ParseStringFlag(argv[i], "benchmark_out",
-                        &FLAGS_benchmark_out) ||
+                      &FLAGS_benchmark_report_aggregates_only) ||
+        ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) ||
+        ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) ||
         ParseStringFlag(argv[i], "benchmark_out_format",
                         &FLAGS_benchmark_out_format) ||
-        ParseBoolFlag(argv[i], "color_print",
-                       &FLAGS_color_print) ||
+        ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
+        // "color_print" is the deprecated name for "benchmark_color".
+        // TODO: Remove this.
+        ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
         ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
       for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
 
@@ -1208,36 +642,26 @@ void ParseCommandLineFlags(int* argc, char** argv) {
       PrintUsageAndExit();
     }
   }
-  for (auto const* flag : {&FLAGS_benchmark_format,
-                           &FLAGS_benchmark_out_format})
-  if (*flag != "console" && *flag != "json" && *flag != "csv") {
+  for (auto const* flag :
+       {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format})
+    if (*flag != "console" && *flag != "json" && *flag != "csv") {
+      PrintUsageAndExit();
+    }
+  if (FLAGS_benchmark_color.empty()) {
     PrintUsageAndExit();
   }
 }
 
-Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
-    std::unique_ptr<Benchmark> bench_ptr(bench);
-    BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
-    families->AddBenchmark(std::move(bench_ptr));
-    return bench;
-}
-
 int InitializeStreams() {
-    static std::ios_base::Init init;
-    return 0;
+  static std::ios_base::Init init;
+  return 0;
 }
 
-} // end namespace internal
+}  // end namespace internal
 
 void Initialize(int* argc, char** argv) {
   internal::ParseCommandLineFlags(argc, argv);
-  internal::SetLogLevel(FLAGS_v);
-  // TODO remove this. It prints some output the first time it is called.
-  // We don't want to have this ouput printed during benchmarking.
-  MyCPUUsage();
-  // The first call to walltime::Now initialized it. Call it once to
-  // prevent the initialization from happening in a benchmark.
-  walltime::Now();
+  internal::LogLevel() = FLAGS_v;
 }
 
-} // end namespace benchmark
+}  // end namespace benchmark
diff --git a/utils/google-benchmark/src/benchmark_api_internal.h b/utils/google-benchmark/src/benchmark_api_internal.h
new file mode 100644
index 000000000..8b97ce600
--- /dev/null
+++ b/utils/google-benchmark/src/benchmark_api_internal.h
@@ -0,0 +1,47 @@
+#ifndef BENCHMARK_API_INTERNAL_H
+#define BENCHMARK_API_INTERNAL_H
+
+#include "benchmark/benchmark_api.h"
+
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+#include <string>
+#include <vector>
+
+namespace benchmark {
+namespace internal {
+
+// Information kept per benchmark we may want to run
+struct Benchmark::Instance {
+  std::string name;
+  Benchmark* benchmark;
+  ReportMode report_mode;
+  std::vector<int> arg;
+  TimeUnit time_unit;
+  int range_multiplier;
+  bool use_real_time;
+  bool use_manual_time;
+  BigO complexity;
+  BigOFunc* complexity_lambda;
+  bool last_benchmark_instance;
+  int repetitions;
+  double min_time;
+  int threads;  // Number of concurrent threads to us
+};
+
+bool FindBenchmarksInternal(const std::string& re,
+                            std::vector<Benchmark::Instance>* benchmarks,
+                            std::ostream* Err);
+
+namespace {
+
+bool IsZero(double n) {
+  return std::abs(n) < std::numeric_limits<double>::epsilon();
+}
+
+}  // end namespace
+}  // end namespace internal
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_API_INTERNAL_H
diff --git a/utils/google-benchmark/src/benchmark_register.cc b/utils/google-benchmark/src/benchmark_register.cc
new file mode 100644
index 000000000..4e580d8ea
--- /dev/null
+++ b/utils/google-benchmark/src/benchmark_register.cc
@@ -0,0 +1,439 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "benchmark/benchmark.h"
+#include "benchmark_api_internal.h"
+#include "internal_macros.h"
+
+#ifndef BENCHMARK_OS_WINDOWS
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+#endif
+
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <thread>
+
+#include "check.h"
+#include "commandlineflags.h"
+#include "complexity.h"
+#include "log.h"
+#include "mutex.h"
+#include "re.h"
+#include "stat.h"
+#include "string_util.h"
+#include "sysinfo.h"
+#include "timers.h"
+
+namespace benchmark {
+
+namespace {
+// For non-dense Range, intermediate values are powers of kRangeMultiplier.
+static const int kRangeMultiplier = 8;
+// The size of a benchmark family determines is the number of inputs to repeat
+// the benchmark on. If this is "large" then warn the user during configuration.
+static const size_t kMaxFamilySize = 100;
+}  // end namespace
+
+namespace internal {
+
+//=============================================================================//
+//                         BenchmarkFamilies
+//=============================================================================//
+
+// Class for managing registered benchmarks.  Note that each registered
+// benchmark identifies a family of related benchmarks to run.
+class BenchmarkFamilies {
+ public:
+  static BenchmarkFamilies* GetInstance();
+
+  // Registers a benchmark family and returns the index assigned to it.
+  size_t AddBenchmark(std::unique_ptr<Benchmark> family);
+
+  // Extract the list of benchmark instances that match the specified
+  // regular expression.
+  bool FindBenchmarks(const std::string& re,
+                      std::vector<Benchmark::Instance>* benchmarks,
+                      std::ostream* Err);
+
+ private:
+  BenchmarkFamilies() {}
+
+  std::vector<std::unique_ptr<Benchmark>> families_;
+  Mutex mutex_;
+};
+
+BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
+  static BenchmarkFamilies instance;
+  return &instance;
+}
+
+size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
+  MutexLock l(mutex_);
+  size_t index = families_.size();
+  families_.push_back(std::move(family));
+  return index;
+}
+
+bool BenchmarkFamilies::FindBenchmarks(
+    const std::string& spec, std::vector<Benchmark::Instance>* benchmarks,
+    std::ostream* ErrStream) {
+  CHECK(ErrStream);
+  auto& Err = *ErrStream;
+  // Make regular expression out of command-line flag
+  std::string error_msg;
+  Regex re;
+  if (!re.Init(spec, &error_msg)) {
+    Err << "Could not compile benchmark re: " << error_msg << std::endl;
+    return false;
+  }
+
+  // Special list of thread counts to use when none are specified
+  const std::vector<int> one_thread = {1};
+
+  MutexLock l(mutex_);
+  for (std::unique_ptr<Benchmark>& family : families_) {
+    // Family was deleted or benchmark doesn't match
+    if (!family) continue;
+
+    if (family->ArgsCnt() == -1) {
+      family->Args({});
+    }
+    const std::vector<int>* thread_counts =
+        (family->thread_counts_.empty()
+             ? &one_thread
+             : &static_cast<const std::vector<int>&>(family->thread_counts_));
+    const size_t family_size = family->args_.size() * thread_counts->size();
+    // The benchmark will be run at least 'family_size' different inputs.
+    // If 'family_size' is very large warn the user.
+    if (family_size > kMaxFamilySize) {
+      Err << "The number of inputs is very large. " << family->name_
+          << " will be repeated at least " << family_size << " times.\n";
+    }
+    // reserve in the special case the regex ".", since we know the final
+    // family size.
+    if (spec == ".") benchmarks->reserve(family_size);
+
+    for (auto const& args : family->args_) {
+      for (int num_threads : *thread_counts) {
+        Benchmark::Instance instance;
+        instance.name = family->name_;
+        instance.benchmark = family.get();
+        instance.report_mode = family->report_mode_;
+        instance.arg = args;
+        instance.time_unit = family->time_unit_;
+        instance.range_multiplier = family->range_multiplier_;
+        instance.min_time = family->min_time_;
+        instance.repetitions = family->repetitions_;
+        instance.use_real_time = family->use_real_time_;
+        instance.use_manual_time = family->use_manual_time_;
+        instance.complexity = family->complexity_;
+        instance.complexity_lambda = family->complexity_lambda_;
+        instance.threads = num_threads;
+
+        // Add arguments to instance name
+        size_t arg_i = 0;
+        for (auto const& arg : args) {
+          instance.name += "/";
+
+          if (arg_i < family->arg_names_.size()) {
+            const auto& arg_name = family->arg_names_[arg_i];
+            if (!arg_name.empty()) {
+              instance.name +=
+                  StringPrintF("%s:", family->arg_names_[arg_i].c_str());
+            }
+          }
+
+          AppendHumanReadable(arg, &instance.name);
+          ++arg_i;
+        }
+
+        if (!IsZero(family->min_time_)) {
+          instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
+        }
+        if (family->repetitions_ != 0) {
+          instance.name += StringPrintF("/repeats:%d", family->repetitions_);
+        }
+        if (family->use_manual_time_) {
+          instance.name += "/manual_time";
+        } else if (family->use_real_time_) {
+          instance.name += "/real_time";
+        }
+
+        // Add the number of threads used to the name
+        if (!family->thread_counts_.empty()) {
+          instance.name += StringPrintF("/threads:%d", instance.threads);
+        }
+
+        if (re.Match(instance.name)) {
+          instance.last_benchmark_instance = (&args == &family->args_.back());
+          benchmarks->push_back(std::move(instance));
+        }
+      }
+    }
+  }
+  return true;
+}
+
+Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
+  std::unique_ptr<Benchmark> bench_ptr(bench);
+  BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
+  families->AddBenchmark(std::move(bench_ptr));
+  return bench;
+}
+
+// FIXME: This function is a hack so that benchmark.cc can access
+// `BenchmarkFamilies`
+bool FindBenchmarksInternal(const std::string& re,
+                            std::vector<Benchmark::Instance>* benchmarks,
+                            std::ostream* Err) {
+  return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err);
+}
+
+//=============================================================================//
+//                               Benchmark
+//=============================================================================//
+
+Benchmark::Benchmark(const char* name)
+    : name_(name),
+      report_mode_(RM_Unspecified),
+      time_unit_(kNanosecond),
+      range_multiplier_(kRangeMultiplier),
+      min_time_(0),
+      repetitions_(0),
+      use_real_time_(false),
+      use_manual_time_(false),
+      complexity_(oNone),
+      complexity_lambda_(nullptr) {}
+
+Benchmark::~Benchmark() {}
+
+void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
+  CHECK_GE(lo, 0);
+  CHECK_GE(hi, lo);
+  CHECK_GE(mult, 2);
+
+  // Add "lo"
+  dst->push_back(lo);
+
+  static const int kint32max = std::numeric_limits<int32_t>::max();
+
+  // Now space out the benchmarks in multiples of "mult"
+  for (int32_t i = 1; i < kint32max / mult; i *= mult) {
+    if (i >= hi) break;
+    if (i > lo) {
+      dst->push_back(i);
+    }
+  }
+  // Add "hi" (if different from "lo")
+  if (hi != lo) {
+    dst->push_back(hi);
+  }
+}
+
+Benchmark* Benchmark::Arg(int x) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+  args_.push_back({x});
+  return this;
+}
+
+Benchmark* Benchmark::Unit(TimeUnit unit) {
+  time_unit_ = unit;
+  return this;
+}
+
+Benchmark* Benchmark::Range(int start, int limit) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+  std::vector<int> arglist;
+  AddRange(&arglist, start, limit, range_multiplier_);
+
+  for (int i : arglist) {
+    args_.push_back({i});
+  }
+  return this;
+}
+
+Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
+  std::vector<std::vector<int>> arglists(ranges.size());
+  std::size_t total = 1;
+  for (std::size_t i = 0; i < ranges.size(); i++) {
+    AddRange(&arglists[i], ranges[i].first, ranges[i].second,
+             range_multiplier_);
+    total *= arglists[i].size();
+  }
+
+  std::vector<std::size_t> ctr(arglists.size(), 0);
+
+  for (std::size_t i = 0; i < total; i++) {
+    std::vector<int> tmp;
+    tmp.reserve(arglists.size());
+
+    for (std::size_t j = 0; j < arglists.size(); j++) {
+      tmp.push_back(arglists[j].at(ctr[j]));
+    }
+
+    args_.push_back(std::move(tmp));
+
+    for (std::size_t j = 0; j < arglists.size(); j++) {
+      if (ctr[j] + 1 < arglists[j].size()) {
+        ++ctr[j];
+        break;
+      }
+      ctr[j] = 0;
+    }
+  }
+  return this;
+}
+
+Benchmark* Benchmark::ArgName(const std::string& name) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+  arg_names_ = {name};
+  return this;
+}
+
+Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
+  arg_names_ = names;
+  return this;
+}
+
+Benchmark* Benchmark::DenseRange(int start, int limit, int step) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+  CHECK_GE(start, 0);
+  CHECK_LE(start, limit);
+  for (int arg = start; arg <= limit; arg += step) {
+    args_.push_back({arg});
+  }
+  return this;
+}
+
+Benchmark* Benchmark::Args(const std::vector<int>& args) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
+  args_.push_back(args);
+  return this;
+}
+
+Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
+  custom_arguments(this);
+  return this;
+}
+
+Benchmark* Benchmark::RangeMultiplier(int multiplier) {
+  CHECK(multiplier > 1);
+  range_multiplier_ = multiplier;
+  return this;
+}
+
+Benchmark* Benchmark::Repetitions(int n) {
+  CHECK(n > 0);
+  repetitions_ = n;
+  return this;
+}
+
+Benchmark* Benchmark::ReportAggregatesOnly(bool value) {
+  report_mode_ = value ? RM_ReportAggregatesOnly : RM_Default;
+  return this;
+}
+
+Benchmark* Benchmark::MinTime(double t) {
+  CHECK(t > 0.0);
+  min_time_ = t;
+  return this;
+}
+
+Benchmark* Benchmark::UseRealTime() {
+  CHECK(!use_manual_time_)
+      << "Cannot set UseRealTime and UseManualTime simultaneously.";
+  use_real_time_ = true;
+  return this;
+}
+
+Benchmark* Benchmark::UseManualTime() {
+  CHECK(!use_real_time_)
+      << "Cannot set UseRealTime and UseManualTime simultaneously.";
+  use_manual_time_ = true;
+  return this;
+}
+
+Benchmark* Benchmark::Complexity(BigO complexity) {
+  complexity_ = complexity;
+  return this;
+}
+
+Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
+  complexity_lambda_ = complexity;
+  complexity_ = oLambda;
+  return this;
+}
+
+Benchmark* Benchmark::Threads(int t) {
+  CHECK_GT(t, 0);
+  thread_counts_.push_back(t);
+  return this;
+}
+
+Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
+  CHECK_GT(min_threads, 0);
+  CHECK_GE(max_threads, min_threads);
+
+  AddRange(&thread_counts_, min_threads, max_threads, 2);
+  return this;
+}
+
+Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
+                                       int stride) {
+  CHECK_GT(min_threads, 0);
+  CHECK_GE(max_threads, min_threads);
+  CHECK_GE(stride, 1);
+
+  for (auto i = min_threads; i < max_threads; i += stride) {
+    thread_counts_.push_back(i);
+  }
+  thread_counts_.push_back(max_threads);
+  return this;
+}
+
+Benchmark* Benchmark::ThreadPerCpu() {
+  static int num_cpus = NumCPUs();
+  thread_counts_.push_back(num_cpus);
+  return this;
+}
+
+void Benchmark::SetName(const char* name) { name_ = name; }
+
+int Benchmark::ArgsCnt() const {
+  if (args_.empty()) {
+    if (arg_names_.empty()) return -1;
+    return static_cast<int>(arg_names_.size());
+  }
+  return static_cast<int>(args_.front().size());
+}
+
+//=============================================================================//
+//                            FunctionBenchmark
+//=============================================================================//
+
+void FunctionBenchmark::Run(State& st) { func_(st); }
+
+}  // end namespace internal
+}  // end namespace benchmark
diff --git a/utils/google-benchmark/src/check.h b/utils/google-benchmark/src/check.h
index 4572babb4..6f1fe0cf8 100644
--- a/utils/google-benchmark/src/check.h
+++ b/utils/google-benchmark/src/check.h
@@ -13,53 +13,52 @@ namespace internal {
 typedef void(AbortHandlerT)();
 
 inline AbortHandlerT*& GetAbortHandler() {
-    static AbortHandlerT* handler = &std::abort;
-    return handler;
+  static AbortHandlerT* handler = &std::abort;
+  return handler;
 }
 
 BENCHMARK_NORETURN inline void CallAbortHandler() {
-    GetAbortHandler()();
-    std::abort(); // fallback to enforce noreturn
+  GetAbortHandler()();
+  std::abort();  // fallback to enforce noreturn
 }
 
 // CheckHandler is the class constructed by failing CHECK macros. CheckHandler
 // will log information about the failures and abort when it is destructed.
 class CheckHandler {
-public:
+ public:
   CheckHandler(const char* check, const char* file, const char* func, int line)
-    : log_(GetErrorLogInstance())
-  {
-    log_ << file << ":" << line << ": " << func << ": Check `"
-          << check << "' failed. ";
+      : log_(GetErrorLogInstance()) {
+    log_ << file << ":" << line << ": " << func << ": Check `" << check
+         << "' failed. ";
   }
 
-  std::ostream& GetLog() {
-    return log_;
-  }
+  LogType& GetLog() { return log_; }
 
   BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
-      log_ << std::endl;
-      CallAbortHandler();
+    log_ << std::endl;
+    CallAbortHandler();
   }
 
-  CheckHandler & operator=(const CheckHandler&) = delete;
+  CheckHandler& operator=(const CheckHandler&) = delete;
   CheckHandler(const CheckHandler&) = delete;
   CheckHandler() = delete;
-private:
-  std::ostream& log_;
+
+ private:
+  LogType& log_;
 };
 
-} // end namespace internal
-} // end namespace benchmark
+}  // end namespace internal
+}  // end namespace benchmark
 
 // The CHECK macro returns a std::ostream object that can have extra information
 // written to it.
 #ifndef NDEBUG
-# define CHECK(b)  (b ? ::benchmark::internal::GetNullLogInstance()        \
-                      : ::benchmark::internal::CheckHandler(               \
-                          #b, __FILE__, __func__, __LINE__).GetLog())
+#define CHECK(b)                                                             \
+  (b ? ::benchmark::internal::GetNullLogInstance()                           \
+     : ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \
+           .GetLog())
 #else
-# define CHECK(b) ::benchmark::internal::GetNullLogInstance()
+#define CHECK(b) ::benchmark::internal::GetNullLogInstance()
 #endif
 
 #define CHECK_EQ(a, b) CHECK((a) == (b))
diff --git a/utils/google-benchmark/src/colorprint.cc b/utils/google-benchmark/src/colorprint.cc
index b7d316fea..513376b14 100644
--- a/utils/google-benchmark/src/colorprint.cc
+++ b/utils/google-benchmark/src/colorprint.cc
@@ -16,16 +16,20 @@
 
 #include <cstdarg>
 #include <cstdio>
-#include <cstdarg>
-#include <string>
+#include <cstdlib>
+#include <cstring>
 #include <memory>
+#include <string>
 
 #include "check.h"
 #include "internal_macros.h"
 
 #ifdef BENCHMARK_OS_WINDOWS
 #include <Windows.h>
-#endif
+#include <io.h>
+#else
+#include <unistd.h>
+#endif  // BENCHMARK_OS_WINDOWS
 
 namespace benchmark {
 namespace {
@@ -78,7 +82,7 @@ PlatformColorCode GetPlatformColorCode(LogColor color) {
 
 }  // end namespace
 
-std::string FormatString(const char *msg, va_list args) {
+std::string FormatString(const char* msg, va_list args) {
   // we might need a second shot at this, so pre-emptivly make a copy
   va_list args_cp;
   va_copy(args_cp, args);
@@ -92,13 +96,13 @@ std::string FormatString(const char *msg, va_list args) {
   // currently there is no error handling for failure, so this is hack.
   CHECK(ret >= 0);
 
-  if (ret == 0) // handle empty expansion
+  if (ret == 0)  // handle empty expansion
     return {};
   else if (static_cast<size_t>(ret) < size)
     return local_buff;
   else {
     // we did not provide a long enough buffer on our first attempt.
-    size = (size_t)ret + 1; // + 1 for the null byte
+    size = (size_t)ret + 1;  // + 1 for the null byte
     std::unique_ptr<char[]> buff(new char[size]);
     ret = std::vsnprintf(buff.get(), size, msg, args);
     CHECK(ret > 0 && ((size_t)ret) < size);
@@ -106,7 +110,7 @@ std::string FormatString(const char *msg, va_list args) {
   }
 }
 
-std::string FormatString(const char *msg, ...) {
+std::string FormatString(const char* msg, ...) {
   va_list args;
   va_start(args, msg);
   auto tmp = FormatString(msg, args);
@@ -121,9 +125,10 @@ void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) {
   va_end(args);
 }
 
-void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list args) {
+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt,
+                 va_list args) {
 #ifdef BENCHMARK_OS_WINDOWS
-  ((void)out); // suppress unused warning
+  ((void)out);  // suppress unused warning
 
   const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
 
@@ -148,7 +153,36 @@ void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list arg
   if (color_code) out << FormatString("\033[0;3%sm", color_code);
   out << FormatString(fmt, args) << "\033[m";
 #endif
+}
 
+bool IsColorTerminal() {
+#if BENCHMARK_OS_WINDOWS
+  // On Windows the TERM variable is usually not set, but the
+  // console there does support colors.
+  return 0 != _isatty(_fileno(stdout));
+#else
+  // On non-Windows platforms, we rely on the TERM variable. This list of
+  // supported TERM values is copied from Google Test:
+  // <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>.
+  const char* const SUPPORTED_TERM_VALUES[] = {
+      "xterm",         "xterm-color",     "xterm-256color",
+      "screen",        "screen-256color", "tmux",
+      "tmux-256color", "rxvt-unicode",    "rxvt-unicode-256color",
+      "linux",         "cygwin",
+  };
+
+  const char* const term = getenv("TERM");
+
+  bool term_supports_color = false;
+  for (const char* candidate : SUPPORTED_TERM_VALUES) {
+    if (term && 0 == strcmp(term, candidate)) {
+      term_supports_color = true;
+      break;
+    }
+  }
+
+  return 0 != isatty(fileno(stdout)) && term_supports_color;
+#endif  // BENCHMARK_OS_WINDOWS
 }
 
 }  // end namespace benchmark
diff --git a/utils/google-benchmark/src/colorprint.h b/utils/google-benchmark/src/colorprint.h
index d9247954d..9f6fab9b3 100644
--- a/utils/google-benchmark/src/colorprint.h
+++ b/utils/google-benchmark/src/colorprint.h
@@ -2,8 +2,8 @@
 #define BENCHMARK_COLORPRINT_H_
 
 #include <cstdarg>
-#include <string>
 #include <iostream>
+#include <string>
 
 namespace benchmark {
 enum LogColor {
@@ -20,9 +20,14 @@ enum LogColor {
 std::string FormatString(const char* msg, va_list args);
 std::string FormatString(const char* msg, ...);
 
-void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, va_list args);
+void ColorPrintf(std::ostream& out, LogColor color, const char* fmt,
+                 va_list args);
 void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...);
 
+// Returns true if stdout appears to be a terminal that supports colored
+// output, false otherwise.
+bool IsColorTerminal();
+
 }  // end namespace benchmark
 
 #endif  // BENCHMARK_COLORPRINT_H_
diff --git a/utils/google-benchmark/src/commandlineflags.cc b/utils/google-benchmark/src/commandlineflags.cc
index 3e9a37a71..72534e022 100644
--- a/utils/google-benchmark/src/commandlineflags.cc
+++ b/utils/google-benchmark/src/commandlineflags.cc
@@ -14,6 +14,7 @@
 
 #include "commandlineflags.h"
 
+#include <cctype>
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
@@ -43,7 +44,7 @@ bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
       // The parsed value overflows as a long.  (strtol() returns
       // LONG_MAX or LONG_MIN when the input overflows.)
       result != long_value
-          // The parsed value overflows as an Int32.
+      // The parsed value overflows as an Int32.
       ) {
     std::cerr << src_text << " is expected to be a 32-bit integer, "
               << "but actually has value \"" << str << "\", "
@@ -74,17 +75,6 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) {
   return true;
 }
 
-inline const char* GetEnv(const char* name) {
-#if defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
-  // Environment variables which we programmatically clear will be set to the
-  // empty string rather than unset (nullptr).  Handle that case.
-  const char* const env = getenv(name);
-  return (env != nullptr && env[0] != '\0') ? env : nullptr;
-#else
-  return getenv(name);
-#endif
-}
-
 // Returns the name of the environment variable corresponding to the
 // given flag.  For example, FlagToEnvVar("foo") will return
 // "BENCHMARK_FOO" in the open-source version.
@@ -104,8 +94,9 @@ static std::string FlagToEnvVar(const char* flag) {
 // The value is considered true iff it's not "0".
 bool BoolFromEnv(const char* flag, bool default_value) {
   const std::string env_var = FlagToEnvVar(flag);
-  const char* const string_value = GetEnv(env_var.c_str());
-  return string_value == nullptr ? default_value : strcmp(string_value, "0") != 0;
+  const char* const string_value = getenv(env_var.c_str());
+  return string_value == nullptr ? default_value
+                                 : strcmp(string_value, "0") != 0;
 }
 
 // Reads and returns a 32-bit integer stored in the environment
@@ -113,7 +104,7 @@ bool BoolFromEnv(const char* flag, bool default_value) {
 // doesn't represent a valid 32-bit integer, returns default_value.
 int32_t Int32FromEnv(const char* flag, int32_t default_value) {
   const std::string env_var = FlagToEnvVar(flag);
-  const char* const string_value = GetEnv(env_var.c_str());
+  const char* const string_value = getenv(env_var.c_str());
   if (string_value == nullptr) {
     // The environment variable is not set.
     return default_value;
@@ -133,7 +124,7 @@ int32_t Int32FromEnv(const char* flag, int32_t default_value) {
 // the given flag; if it's not set, returns default_value.
 const char* StringFromEnv(const char* flag, const char* default_value) {
   const std::string env_var = FlagToEnvVar(flag);
-  const char* const value = GetEnv(env_var.c_str());
+  const char* const value = getenv(env_var.c_str());
   return value == nullptr ? default_value : value;
 }
 
@@ -175,7 +166,7 @@ bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
   if (value_str == nullptr) return false;
 
   // Converts the string value to a bool.
-  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
+  *value = IsTruthyFlagValue(value_str);
   return true;
 }
 
@@ -217,4 +208,11 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
 bool IsFlag(const char* str, const char* flag) {
   return (ParseFlagValue(str, flag, true) != nullptr);
 }
+
+bool IsTruthyFlagValue(const std::string& str) {
+  if (str.empty()) return true;
+  char ch = str[0];
+  return isalnum(ch) &&
+         !(ch == '0' || ch == 'f' || ch == 'F' || ch == 'n' || ch == 'N');
+}
 }  // end namespace benchmark
diff --git a/utils/google-benchmark/src/commandlineflags.h b/utils/google-benchmark/src/commandlineflags.h
index 34b9c6f30..945c9a9fc 100644
--- a/utils/google-benchmark/src/commandlineflags.h
+++ b/utils/google-benchmark/src/commandlineflags.h
@@ -38,8 +38,7 @@ const char* StringFromEnv(const char* flag, const char* default_val);
 // Parses a string for a bool flag, in the form of either
 // "--flag=value" or "--flag".
 //
-// In the former case, the value is taken as true as long as it does
-// not start with '0', 'f', or 'F'.
+// In the former case, the value is taken as true if it passes IsTruthyValue().
 //
 // In the latter case, the value is taken as true.
 //
@@ -71,6 +70,10 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value);
 // Returns true if the string matches the flag.
 bool IsFlag(const char* str, const char* flag);
 
+// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or
+// some non-alphanumeric character. As a special case, also returns true if
+// value is the empty string.
+bool IsTruthyFlagValue(const std::string& value);
 }  // end namespace benchmark
 
 #endif  // BENCHMARK_COMMANDLINEFLAGS_H_
diff --git a/utils/google-benchmark/src/complexity.cc b/utils/google-benchmark/src/complexity.cc
index 7d5579430..dfab791a3 100644
--- a/utils/google-benchmark/src/complexity.cc
+++ b/utils/google-benchmark/src/complexity.cc
@@ -119,8 +119,7 @@ LeastSq MinimalLeastSq(const std::vector<int>& n,
 //                  this one. If it is oAuto, it will be calculated the best
 //                  fitting curve.
 LeastSq MinimalLeastSq(const std::vector<int>& n,
-                       const std::vector<double>& time,
-                       const BigO complexity) {
+                       const std::vector<double>& time, const BigO complexity) {
   CHECK_EQ(n.size(), time.size());
   CHECK_GE(n.size(), 2);  // Do not compute fitting curve is less than two
                           // benchmark runs are given
@@ -196,6 +195,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
       cpu_accumulated_time_stat.Mean() * run_iterations;
   mean_data.bytes_per_second = bytes_per_second_stat.Mean();
   mean_data.items_per_second = items_per_second_stat.Mean();
+  mean_data.time_unit = reports[0].time_unit;
 
   // Only add label to mean/stddev if it is same for all runs
   mean_data.report_label = reports[0].report_label;
@@ -214,6 +214,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
   stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev();
   stddev_data.bytes_per_second = bytes_per_second_stat.StdDev();
   stddev_data.items_per_second = items_per_second_stat.StdDev();
+  stddev_data.time_unit = reports[0].time_unit;
 
   results.push_back(mean_data);
   results.push_back(stddev_data);
diff --git a/utils/google-benchmark/src/complexity.h b/utils/google-benchmark/src/complexity.h
index 85cc12507..23cd9bbc8 100644
--- a/utils/google-benchmark/src/complexity.h
+++ b/utils/google-benchmark/src/complexity.h
@@ -47,10 +47,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
 //                   parameter will return the best fitting curve detected.
 
 struct LeastSq {
-  LeastSq() :
-    coef(0.0),
-    rms(0.0),
-    complexity(oNone) {}
+  LeastSq() : coef(0.0), rms(0.0), complexity(oNone) {}
 
   double coef;
   double rms;
@@ -60,5 +57,5 @@ struct LeastSq {
 // Function to return an string for the calculated complexity
 std::string GetBigOString(BigO complexity);
 
-} // end namespace benchmark
-#endif // COMPLEXITY_H_
+}  // end namespace benchmark
+#endif  // COMPLEXITY_H_
diff --git a/utils/google-benchmark/src/console_reporter.cc b/utils/google-benchmark/src/console_reporter.cc
index a663b9367..7e0cca3e3 100644
--- a/utils/google-benchmark/src/console_reporter.cc
+++ b/utils/google-benchmark/src/console_reporter.cc
@@ -28,7 +28,7 @@
 #include "commandlineflags.h"
 #include "internal_macros.h"
 #include "string_util.h"
-#include "walltime.h"
+#include "timers.h"
 
 namespace benchmark {
 
@@ -39,46 +39,45 @@ bool ConsoleReporter::ReportContext(const Context& context) {
 
 #ifdef BENCHMARK_OS_WINDOWS
   if (color_output_ && &std::cout != &GetOutputStream()) {
-      GetErrorStream() << "Color printing is only supported for stdout on windows."
-                          " Disabling color printing\n";
-      color_output_ = false;
+    GetErrorStream()
+        << "Color printing is only supported for stdout on windows."
+           " Disabling color printing\n";
+    color_output_ = false;
   }
 #endif
-  std::string str = FormatString("%-*s %13s %13s %10s\n",
-                             static_cast<int>(name_field_width_), "Benchmark",
-                             "Time", "CPU", "Iterations");
+  std::string str =
+      FormatString("%-*s %13s %13s %10s\n", static_cast<int>(name_field_width_),
+                   "Benchmark", "Time", "CPU", "Iterations");
   GetOutputStream() << str << std::string(str.length() - 1, '-') << "\n";
 
   return true;
 }
 
 void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
-  for (const auto& run : reports)
-    PrintRunData(run);
+  for (const auto& run : reports) PrintRunData(run);
 }
 
-static void  IgnoreColorPrint(std::ostream& out, LogColor,
-                               const char* fmt, ...)
-{
-    va_list args;
-    va_start(args, fmt);
-    out << FormatString(fmt, args);
-    va_end(args);
+static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
+                             ...) {
+  va_list args;
+  va_start(args, fmt);
+  out << FormatString(fmt, args);
+  va_end(args);
 }
 
 void ConsoleReporter::PrintRunData(const Run& result) {
   typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
   auto& Out = GetOutputStream();
-  PrinterFn* printer = color_output_ ? (PrinterFn*)ColorPrintf
-                                     : IgnoreColorPrint;
+  PrinterFn* printer =
+      color_output_ ? (PrinterFn*)ColorPrintf : IgnoreColorPrint;
   auto name_color =
       (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
   printer(Out, name_color, "%-*s ", name_field_width_,
-              result.benchmark_name.c_str());
+          result.benchmark_name.c_str());
 
   if (result.error_occurred) {
     printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
-                result.error_message.c_str());
+            result.error_message.c_str());
     printer(Out, COLOR_DEFAULT, "\n");
     return;
   }
@@ -91,24 +90,24 @@ void ConsoleReporter::PrintRunData(const Run& result) {
   // Format items per second
   std::string items;
   if (result.items_per_second > 0) {
-    items = StrCat(" ", HumanReadableNumber(result.items_per_second),
-                   " items/s");
- }
+    items =
+        StrCat(" ", HumanReadableNumber(result.items_per_second), " items/s");
+  }
 
   const double real_time = result.GetAdjustedRealTime();
   const double cpu_time = result.GetAdjustedCPUTime();
 
   if (result.report_big_o) {
     std::string big_o = GetBigOString(result.complexity);
-    printer(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time,
-                big_o.c_str(), cpu_time, big_o.c_str());
+    printer(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time, big_o.c_str(),
+            cpu_time, big_o.c_str());
   } else if (result.report_rms) {
     printer(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100,
-                cpu_time * 100);
+            cpu_time * 100);
   } else {
     const char* timeLabel = GetTimeUnitString(result.time_unit);
     printer(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel,
-                cpu_time, timeLabel);
+            cpu_time, timeLabel);
   }
 
   if (!result.report_big_o && !result.report_rms) {
diff --git a/utils/google-benchmark/src/csv_reporter.cc b/utils/google-benchmark/src/csv_reporter.cc
index 7bc7ef3db..18ab3b668 100644
--- a/utils/google-benchmark/src/csv_reporter.cc
+++ b/utils/google-benchmark/src/csv_reporter.cc
@@ -23,7 +23,7 @@
 #include <vector>
 
 #include "string_util.h"
-#include "walltime.h"
+#include "timers.h"
 
 // File format reference: http://edoceo.com/utilitas/csv-file-format.
 
@@ -31,38 +31,28 @@ namespace benchmark {
 
 namespace {
 std::vector<std::string> elements = {
-  "name",
-  "iterations",
-  "real_time",
-  "cpu_time",
-  "time_unit",
-  "bytes_per_second",
-  "items_per_second",
-  "label",
-  "error_occurred",
-  "error_message"
-};
+    "name",           "iterations",       "real_time",        "cpu_time",
+    "time_unit",      "bytes_per_second", "items_per_second", "label",
+    "error_occurred", "error_message"};
 }
 
 bool CSVReporter::ReportContext(const Context& context) {
   PrintBasicContext(&GetErrorStream(), context);
 
   std::ostream& Out = GetOutputStream();
-  for (auto B = elements.begin(); B != elements.end(); ) {
+  for (auto B = elements.begin(); B != elements.end();) {
     Out << *B++;
-    if (B != elements.end())
-      Out << ",";
+    if (B != elements.end()) Out << ",";
   }
   Out << "\n";
   return true;
 }
 
-void CSVReporter::ReportRuns(const std::vector<Run> & reports) {
-  for (const auto& run : reports)
-    PrintRunData(run);
+void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
+  for (const auto& run : reports) PrintRunData(run);
 }
 
-void CSVReporter::PrintRunData(const Run & run) {
+void CSVReporter::PrintRunData(const Run& run) {
   std::ostream& Out = GetOutputStream();
 
   // Field with embedded double-quote characters must be doubled and the field
diff --git a/utils/google-benchmark/src/internal_macros.h b/utils/google-benchmark/src/internal_macros.h
index 1080ac943..e8efcbb39 100644
--- a/utils/google-benchmark/src/internal_macros.h
+++ b/utils/google-benchmark/src/internal_macros.h
@@ -4,37 +4,39 @@
 #include "benchmark/macros.h"
 
 #ifndef __has_feature
-# define __has_feature(x) 0
-#endif
-
-#if __has_feature(cxx_attributes)
-# define BENCHMARK_NORETURN [[noreturn]]
-#elif defined(__GNUC__)
-# define BENCHMARK_NORETURN __attribute__((noreturn))
-#else
-# define BENCHMARK_NORETURN
-#endif
-
-#if defined(__CYGWIN__)
-# define BENCHMARK_OS_CYGWIN 1
-#elif defined(_WIN32)
-# define BENCHMARK_OS_WINDOWS 1
-#elif defined(__APPLE__)
-// TODO(ericwf) This doesn't actually check that it is a Mac OSX system. Just
-// that it is an apple system.
-# define BENCHMARK_OS_MACOSX 1
-#elif defined(__FreeBSD__)
-# define BENCHMARK_OS_FREEBSD 1
-#elif defined(__linux__)
-# define BENCHMARK_OS_LINUX 1
+#define __has_feature(x) 0
 #endif
 
 #if defined(__clang__)
-# define COMPILER_CLANG
+#define COMPILER_CLANG
 #elif defined(_MSC_VER)
-# define COMPILER_MSVC
+#define COMPILER_MSVC
 #elif defined(__GNUC__)
-# define COMPILER_GCC
+#define COMPILER_GCC
 #endif
 
-#endif // BENCHMARK_INTERNAL_MACROS_H_
+#if __has_feature(cxx_attributes)
+#define BENCHMARK_NORETURN [[noreturn]]
+#elif defined(__GNUC__)
+#define BENCHMARK_NORETURN __attribute__((noreturn))
+#elif defined(COMPILER_MSVC)
+#define BENCHMARK_NORETURN __declspec(noreturn)
+#else
+#define BENCHMARK_NORETURN
+#endif
+
+#if defined(__CYGWIN__)
+#define BENCHMARK_OS_CYGWIN 1
+#elif defined(_WIN32)
+#define BENCHMARK_OS_WINDOWS 1
+#elif defined(__APPLE__)
+// TODO(ericwf) This doesn't actually check that it is a Mac OSX system. Just
+// that it is an apple system.
+#define BENCHMARK_OS_MACOSX 1
+#elif defined(__FreeBSD__)
+#define BENCHMARK_OS_FREEBSD 1
+#elif defined(__linux__)
+#define BENCHMARK_OS_LINUX 1
+#endif
+
+#endif  // BENCHMARK_INTERNAL_MACROS_H_
diff --git a/utils/google-benchmark/src/json_reporter.cc b/utils/google-benchmark/src/json_reporter.cc
index 485d30524..cea5f9bfa 100644
--- a/utils/google-benchmark/src/json_reporter.cc
+++ b/utils/google-benchmark/src/json_reporter.cc
@@ -23,7 +23,7 @@
 #include <vector>
 
 #include "string_util.h"
-#include "walltime.h"
+#include "timers.h"
 
 namespace benchmark {
 
@@ -47,11 +47,13 @@ std::string FormatKV(std::string const& key, int64_t value) {
   return ss.str();
 }
 
-int64_t RoundDouble(double v) {
-    return static_cast<int64_t>(v + 0.5);
+std::string FormatKV(std::string const& key, double value) {
+  return StringPrintF("\"%s\": %.2f", key.c_str(), value);
 }
 
-} // end namespace
+int64_t RoundDouble(double v) { return static_cast<int64_t>(v + 0.5); }
+
+}  // end namespace
 
 bool JSONReporter::ReportContext(const Context& context) {
   std::ostream& out = GetOutputStream();
@@ -66,14 +68,11 @@ bool JSONReporter::ReportContext(const Context& context) {
   std::string walltime_value = LocalDateTimeString();
   out << indent << FormatKV("date", walltime_value) << ",\n";
 
-  out << indent
-      << FormatKV("num_cpus", static_cast<int64_t>(context.num_cpus))
+  out << indent << FormatKV("num_cpus", static_cast<int64_t>(context.num_cpus))
       << ",\n";
-  out << indent
-      << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu))
+  out << indent << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu))
       << ",\n";
-  out << indent
-      << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled)
+  out << indent << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled)
       << ",\n";
 
 #if defined(NDEBUG)
@@ -118,28 +117,20 @@ void JSONReporter::Finalize() {
 void JSONReporter::PrintRunData(Run const& run) {
   std::string indent(6, ' ');
   std::ostream& out = GetOutputStream();
-    out << indent
-        << FormatKV("name", run.benchmark_name)
-        << ",\n";
-    if (run.error_occurred) {
-        out << indent
-            << FormatKV("error_occurred", run.error_occurred)
-            << ",\n";
-        out << indent
-            << FormatKV("error_message", run.error_message)
-            << ",\n";
-    }
+  out << indent << FormatKV("name", run.benchmark_name) << ",\n";
+  if (run.error_occurred) {
+    out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
+    out << indent << FormatKV("error_message", run.error_message) << ",\n";
+  }
   if (!run.report_big_o && !run.report_rms) {
-        out << indent
-            << FormatKV("iterations", run.iterations)
-            << ",\n";
-        out << indent
-            << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime()))
-            << ",\n";
-        out << indent
-            << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime()));
-        out << ",\n" << indent
-            << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
+    out << indent << FormatKV("iterations", run.iterations) << ",\n";
+    out << indent
+        << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime()))
+        << ",\n";
+    out << indent
+        << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime()));
+    out << ",\n"
+        << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
   } else if (run.report_big_o) {
     out << indent
         << FormatKV("cpu_coefficient", RoundDouble(run.GetAdjustedCPUTime()))
@@ -147,15 +138,11 @@ void JSONReporter::PrintRunData(Run const& run) {
     out << indent
         << FormatKV("real_coefficient", RoundDouble(run.GetAdjustedRealTime()))
         << ",\n";
+    out << indent << FormatKV("big_o", GetBigOString(run.complexity)) << ",\n";
+    out << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
+  } else if (run.report_rms) {
     out << indent
-            << FormatKV("big_o", GetBigOString(run.complexity))
-            << ",\n";
-        out << indent
-            << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
-    } else if(run.report_rms) {
-        out << indent
-            << FormatKV("rms", RoundDouble(run.GetAdjustedCPUTime()*100))
-            << '%';
+        << FormatKV("rms", run.GetAdjustedCPUTime());
   }
   if (run.bytes_per_second > 0.0) {
     out << ",\n"
@@ -168,9 +155,7 @@ void JSONReporter::PrintRunData(Run const& run) {
         << FormatKV("items_per_second", RoundDouble(run.items_per_second));
   }
   if (!run.report_label.empty()) {
-    out << ",\n"
-        << indent
-        << FormatKV("label", run.report_label);
+    out << ",\n" << indent << FormatKV("label", run.report_label);
   }
   out << '\n';
 }
diff --git a/utils/google-benchmark/src/log.cc b/utils/google-benchmark/src/log.cc
deleted file mode 100644
index b660309d3..000000000
--- a/utils/google-benchmark/src/log.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "log.h"
-
-#include <iostream>
-
-namespace benchmark {
-namespace internal {
-
-int& LoggingLevelImp() {
-    static int level = 0;
-    return level;
-}
-
-void SetLogLevel(int value) {
-    LoggingLevelImp() = value;
-}
-
-int GetLogLevel() {
-    return LoggingLevelImp();
-}
-
-class NullLogBuffer : public std::streambuf
-{
-public:
-  int overflow(int c) {
-    return c;
-  }
-};
-
-std::ostream& GetNullLogInstance() {
-  static NullLogBuffer log_buff;
-  static std::ostream null_log(&log_buff);
-  return null_log;
-}
-
-std::ostream& GetErrorLogInstance() {
-  return std::clog;
-}
-
-} // end namespace internal
-} // end namespace benchmark
\ No newline at end of file
diff --git a/utils/google-benchmark/src/log.h b/utils/google-benchmark/src/log.h
index 3777810e1..978cb0b4c 100644
--- a/utils/google-benchmark/src/log.h
+++ b/utils/google-benchmark/src/log.h
@@ -1,28 +1,73 @@
 #ifndef BENCHMARK_LOG_H_
 #define BENCHMARK_LOG_H_
 
+#include <iostream>
 #include <ostream>
 
+#include "benchmark/macros.h"
+
 namespace benchmark {
 namespace internal {
 
-int GetLogLevel();
-void SetLogLevel(int level);
+typedef std::basic_ostream<char>&(EndLType)(std::basic_ostream<char>&);
 
-std::ostream& GetNullLogInstance();
-std::ostream& GetErrorLogInstance();
+class LogType {
+  friend LogType& GetNullLogInstance();
+  friend LogType& GetErrorLogInstance();
 
-inline std::ostream& GetLogInstanceForLevel(int level) {
-  if (level <= GetLogLevel()) {
+  // FIXME: Add locking to output.
+  template <class Tp>
+  friend LogType& operator<<(LogType&, Tp const&);
+  friend LogType& operator<<(LogType&, EndLType*);
+
+ private:
+  LogType(std::ostream* out) : out_(out) {}
+  std::ostream* out_;
+  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
+};
+
+template <class Tp>
+LogType& operator<<(LogType& log, Tp const& value) {
+  if (log.out_) {
+    *log.out_ << value;
+  }
+  return log;
+}
+
+inline LogType& operator<<(LogType& log, EndLType* m) {
+  if (log.out_) {
+    *log.out_ << m;
+  }
+  return log;
+}
+
+inline int& LogLevel() {
+  static int log_level = 0;
+  return log_level;
+}
+
+inline LogType& GetNullLogInstance() {
+  static LogType log(nullptr);
+  return log;
+}
+
+inline LogType& GetErrorLogInstance() {
+  static LogType log(&std::clog);
+  return log;
+}
+
+inline LogType& GetLogInstanceForLevel(int level) {
+  if (level <= LogLevel()) {
     return GetErrorLogInstance();
   }
   return GetNullLogInstance();
 }
 
-} // end namespace internal
-} // end namespace benchmark
+}  // end namespace internal
+}  // end namespace benchmark
 
-#define VLOG(x) (::benchmark::internal::GetLogInstanceForLevel(x) \
-                 << "-- LOG(" << x << "): ")
+#define VLOG(x)                                                               \
+  (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \
+                                                                         " ")
 
 #endif
\ No newline at end of file
diff --git a/utils/google-benchmark/src/mutex.h b/utils/google-benchmark/src/mutex.h
index f37ec35b3..5f461d05a 100644
--- a/utils/google-benchmark/src/mutex.h
+++ b/utils/google-benchmark/src/mutex.h
@@ -1,28 +1,26 @@
 #ifndef BENCHMARK_MUTEX_H_
 #define BENCHMARK_MUTEX_H_
 
-#include <mutex>
 #include <condition_variable>
+#include <mutex>
+
+#include "check.h"
 
 // Enable thread safety attributes only with clang.
 // The attributes can be safely erased when compiling with other compilers.
 #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
-#define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
+#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
 #else
-#define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)  // no-op
 #endif
 
-#define CAPABILITY(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
+#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
 
-#define SCOPED_CAPABILITY \
-  THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
 
-#define GUARDED_BY(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
 
-#define PT_GUARDED_BY(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
+#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
 
 #define ACQUIRED_BEFORE(...) \
   THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
@@ -54,22 +52,18 @@
 #define TRY_ACQUIRE_SHARED(...) \
   THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
 
-#define EXCLUDES(...) \
-  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
+#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
 
-#define ASSERT_CAPABILITY(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
+#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
 
 #define ASSERT_SHARED_CAPABILITY(x) \
   THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x))
 
-#define RETURN_CAPABILITY(x) \
-  THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
 
 #define NO_THREAD_SAFETY_ANALYSIS \
   THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
 
-
 namespace benchmark {
 
 typedef std::condition_variable Condition;
@@ -78,65 +72,84 @@ typedef std::condition_variable Condition;
 // we can annotate them with thread safety attributes and use the
 // -Wthread-safety warning with clang. The standard library types cannot be
 // used directly because they do not provided the required annotations.
-class CAPABILITY("mutex") Mutex
-{
-public:
+class CAPABILITY("mutex") Mutex {
+ public:
   Mutex() {}
 
   void lock() ACQUIRE() { mut_.lock(); }
   void unlock() RELEASE() { mut_.unlock(); }
-  std::mutex& native_handle() {
-    return mut_;
-  }
-private:
+  std::mutex& native_handle() { return mut_; }
+
+ private:
   std::mutex mut_;
 };
 
-
-class SCOPED_CAPABILITY MutexLock
-{
+class SCOPED_CAPABILITY MutexLock {
   typedef std::unique_lock<std::mutex> MutexLockImp;
-public:
-  MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle())
-  { }
+
+ public:
+  MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle()) {}
   ~MutexLock() RELEASE() {}
   MutexLockImp& native_handle() { return ml_; }
-private:
+
+ private:
   MutexLockImp ml_;
 };
 
+class Barrier {
+ public:
+  Barrier(int num_threads) : running_threads_(num_threads) {}
 
-class Notification
-{
-public:
-  Notification() : notified_yet_(false) { }
-
-  void WaitForNotification() const EXCLUDES(mutex_) {
-    MutexLock m_lock(mutex_);
-    auto notified_fn = [this]() REQUIRES(mutex_) {
-                            return this->HasBeenNotified();
-                        };
-    cv_.wait(m_lock.native_handle(), notified_fn);
-  }
-
-  void Notify() EXCLUDES(mutex_) {
+  // Called by each thread
+  bool wait() EXCLUDES(lock_) {
+    bool last_thread = false;
     {
-      MutexLock lock(mutex_);
-      notified_yet_ = 1;
+      MutexLock ml(lock_);
+      last_thread = createBarrier(ml);
     }
-    cv_.notify_all();
+    if (last_thread) phase_condition_.notify_all();
+    return last_thread;
   }
 
-private:
-  bool HasBeenNotified() const REQUIRES(mutex_) {
-    return notified_yet_;
+  void removeThread() EXCLUDES(lock_) {
+    MutexLock ml(lock_);
+    --running_threads_;
+    if (entered_ != 0) phase_condition_.notify_all();
   }
 
-  mutable Mutex mutex_;
-  mutable std::condition_variable cv_;
-  bool notified_yet_ GUARDED_BY(mutex_);
+ private:
+  Mutex lock_;
+  Condition phase_condition_;
+  int running_threads_;
+
+  // State for barrier management
+  int phase_number_ = 0;
+  int entered_ = 0;  // Number of threads that have entered this barrier
+
+  // Enter the barrier and wait until all other threads have also
+  // entered the barrier.  Returns iff this is the last thread to
+  // enter the barrier.
+  bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
+    CHECK_LT(entered_, running_threads_);
+    entered_++;
+    if (entered_ < running_threads_) {
+      // Wait for all threads to enter
+      int phase_number_cp = phase_number_;
+      auto cb = [this, phase_number_cp]() {
+        return this->phase_number_ > phase_number_cp ||
+               entered_ == running_threads_;  // A thread has aborted in error
+      };
+      phase_condition_.wait(ml.native_handle(), cb);
+      if (phase_number_ > phase_number_cp) return false;
+      // else (running_threads_ == entered_) and we are the last thread.
+    }
+    // Last thread has reached the barrier
+    phase_number_++;
+    entered_ = 0;
+    return true;
+  }
 };
 
-} // end namespace benchmark
+}  // end namespace benchmark
 
-#endif // BENCHMARK_MUTEX_H_
+#endif  // BENCHMARK_MUTEX_H_
diff --git a/utils/google-benchmark/src/re.h b/utils/google-benchmark/src/re.h
index af57a39cf..af4a498c9 100644
--- a/utils/google-benchmark/src/re.h
+++ b/utils/google-benchmark/src/re.h
@@ -26,13 +26,16 @@
 #endif
 #include <string>
 
+#include "check.h"
+
 namespace benchmark {
 
 // A wrapper around the POSIX regular expression API that provides automatic
 // cleanup
 class Regex {
  public:
-  Regex();
+  Regex() : init_(false) {}
+
   ~Regex();
 
   // Compile a regular expression matcher from spec.  Returns true on success.
@@ -43,18 +46,81 @@ class Regex {
 
   // Returns whether str matches the compiled regular expression.
   bool Match(const std::string& str);
+
  private:
   bool init_;
-  // Underlying regular expression object
+// Underlying regular expression object
 #if defined(HAVE_STD_REGEX)
   std::regex re_;
 #elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX)
   regex_t re_;
 #else
-# error No regular expression backend implementation available
+#error No regular expression backend implementation available
 #endif
 };
 
+#if defined(HAVE_STD_REGEX)
+
+inline bool Regex::Init(const std::string& spec, std::string* error) {
+  try {
+    re_ = std::regex(spec, std::regex_constants::extended);
+
+    init_ = true;
+  } catch (const std::regex_error& e) {
+    if (error) {
+      *error = e.what();
+    }
+  }
+  return init_;
+}
+
+inline Regex::~Regex() {}
+
+inline bool Regex::Match(const std::string& str) {
+  if (!init_) {
+    return false;
+  }
+  return std::regex_search(str, re_);
+}
+
+#else
+inline bool Regex::Init(const std::string& spec, std::string* error) {
+  int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB);
+  if (ec != 0) {
+    if (error) {
+      size_t needed = regerror(ec, &re_, nullptr, 0);
+      char* errbuf = new char[needed];
+      regerror(ec, &re_, errbuf, needed);
+
+      // regerror returns the number of bytes necessary to null terminate
+      // the string, so we move that when assigning to error.
+      CHECK_NE(needed, 0);
+      error->assign(errbuf, needed - 1);
+
+      delete[] errbuf;
+    }
+
+    return false;
+  }
+
+  init_ = true;
+  return true;
+}
+
+inline Regex::~Regex() {
+  if (init_) {
+    regfree(&re_);
+  }
+}
+
+inline bool Regex::Match(const std::string& str) {
+  if (!init_) {
+    return false;
+  }
+  return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0;
+}
+#endif
+
 }  // end namespace benchmark
 
 #endif  // BENCHMARK_RE_H_
diff --git a/utils/google-benchmark/src/re_posix.cc b/utils/google-benchmark/src/re_posix.cc
deleted file mode 100644
index 95b086ffd..000000000
--- a/utils/google-benchmark/src/re_posix.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "check.h"
-#include "re.h"
-
-namespace benchmark {
-
-Regex::Regex() : init_(false) { }
-
-bool Regex::Init(const std::string& spec, std::string* error) {
-  int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB);
-  if (ec != 0) {
-    if (error) {
-      size_t needed = regerror(ec, &re_, nullptr, 0);
-      char* errbuf = new char[needed];
-      regerror(ec, &re_, errbuf, needed);
-
-      // regerror returns the number of bytes necessary to null terminate
-      // the string, so we move that when assigning to error.
-      CHECK_NE(needed, 0);
-      error->assign(errbuf, needed - 1);
-
-      delete[] errbuf;
-    }
-
-    return false;
-  }
-
-  init_ = true;
-  return true;
-}
-
-Regex::~Regex() {
-  if (init_) {
-    regfree(&re_);
-  }
-}
-
-bool Regex::Match(const std::string& str) {
-  if (!init_) {
-    return false;
-  }
-
-  return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0;
-}
-
-}  // end namespace benchmark
diff --git a/utils/google-benchmark/src/re_std.cc b/utils/google-benchmark/src/re_std.cc
deleted file mode 100644
index cfd7a218a..000000000
--- a/utils/google-benchmark/src/re_std.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "re.h"
-
-namespace benchmark {
-
-Regex::Regex() : init_(false) { }
-
-bool Regex::Init(const std::string& spec, std::string* error) {
-  try {
-    re_ = std::regex(spec, std::regex_constants::extended);
-
-    init_ = true;
-  } catch (const std::regex_error& e) {
-    if (error) {
-      *error = e.what();
-    }
-  }
-  return init_;
-}
-
-Regex::~Regex() { }
-
-bool Regex::Match(const std::string& str) {
-  if (!init_) {
-    return false;
-  }
-
-  return std::regex_search(str, re_);
-}
-
-}  // end namespace benchmark
diff --git a/utils/google-benchmark/src/reporter.cc b/utils/google-benchmark/src/reporter.cc
index 518785907..64742426c 100644
--- a/utils/google-benchmark/src/reporter.cc
+++ b/utils/google-benchmark/src/reporter.cc
@@ -13,13 +13,13 @@
 // limitations under the License.
 
 #include "benchmark/reporter.h"
-#include "walltime.h"
+#include "timers.h"
 
 #include <cstdlib>
 
 #include <iostream>
-#include <vector>
 #include <tuple>
+#include <vector>
 
 #include "check.h"
 #include "stat.h"
@@ -27,49 +27,42 @@
 namespace benchmark {
 
 BenchmarkReporter::BenchmarkReporter()
-    : output_stream_(&std::cout), error_stream_(&std::cerr)
-{
-}
+    : output_stream_(&std::cout), error_stream_(&std::cerr) {}
 
-BenchmarkReporter::~BenchmarkReporter() {
-}
+BenchmarkReporter::~BenchmarkReporter() {}
 
 void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr,
                                           Context const &context) {
   CHECK(out_ptr) << "cannot be null";
-  auto& Out = *out_ptr;
+  auto &Out = *out_ptr;
 
   Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
-            << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
+      << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
 
   Out << LocalDateTimeString() << "\n";
 
   if (context.cpu_scaling_enabled) {
     Out << "***WARNING*** CPU scaling is enabled, the benchmark "
-                 "real time measurements may be noisy and will incur extra "
-                 "overhead.\n";
+           "real time measurements may be noisy and will incur extra "
+           "overhead.\n";
   }
 
 #ifndef NDEBUG
   Out << "***WARNING*** Library was built as DEBUG. Timings may be "
-               "affected.\n";
+         "affected.\n";
 #endif
 }
 
 double BenchmarkReporter::Run::GetAdjustedRealTime() const {
   double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
-  if (iterations != 0)
-    new_time /= static_cast<double>(iterations);
+  if (iterations != 0) new_time /= static_cast<double>(iterations);
   return new_time;
 }
 
 double BenchmarkReporter::Run::GetAdjustedCPUTime() const {
   double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit);
-  if (iterations != 0)
-    new_time /= static_cast<double>(iterations);
+  if (iterations != 0) new_time /= static_cast<double>(iterations);
   return new_time;
 }
 
-
-
-} // end namespace benchmark
+}  // end namespace benchmark
diff --git a/utils/google-benchmark/src/stat.h b/utils/google-benchmark/src/stat.h
index c4ecfe8e0..136c3aa8d 100644
--- a/utils/google-benchmark/src/stat.h
+++ b/utils/google-benchmark/src/stat.h
@@ -6,7 +6,6 @@
 #include <ostream>
 #include <type_traits>
 
-
 namespace benchmark {
 
 template <typename VType, typename NumType>
@@ -136,7 +135,7 @@ class Stat1 {
 
  private:
   static_assert(std::is_integral<NumType>::value &&
-                !std::is_same<NumType, bool>::value,
+                    !std::is_same<NumType, bool>::value,
                 "NumType must be an integral type that is not bool.");
   // Let i be the index of the samples provided (using +=)
   // and weight[i],value[i] be the data of sample #i
diff --git a/utils/google-benchmark/src/string_util.cc b/utils/google-benchmark/src/string_util.cc
index 30d130500..4cefbfba6 100644
--- a/utils/google-benchmark/src/string_util.cc
+++ b/utils/google-benchmark/src/string_util.cc
@@ -1,11 +1,11 @@
 #include "string_util.h"
 
+#include <array>
 #include <cmath>
 #include <cstdarg>
-#include <array>
+#include <cstdio>
 #include <memory>
 #include <sstream>
-#include <stdio.h>
 
 #include "arraysize.h"
 
@@ -27,7 +27,7 @@ static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
 
 static const int64_t kUnitsSize = arraysize(kBigSIUnits);
 
-} // end anonymous namespace
+}  // end anonymous namespace
 
 void ToExponentAndMantissa(double val, double thresh, int precision,
                            double one_k, std::string* mantissa,
@@ -107,7 +107,7 @@ std::string ToBinaryStringFullySpecified(double value, double threshold,
 void AppendHumanReadable(int n, std::string* str) {
   std::stringstream ss;
   // Round down to the nearest SI prefix.
-  ss << "/" << ToBinaryStringFullySpecified(n, 1.0, 0);
+  ss << ToBinaryStringFullySpecified(n, 1.0, 0);
   *str += ss.str();
 }
 
@@ -118,8 +118,7 @@ std::string HumanReadableNumber(double n) {
   return ToBinaryStringFullySpecified(n, 1.1, 1);
 }
 
-std::string StringPrintFImp(const char *msg, va_list args)
-{
+std::string StringPrintFImp(const char* msg, va_list args) {
   // we might need a second shot at this, so pre-emptivly make a copy
   va_list args_cp;
   va_copy(args_cp, args);
@@ -128,14 +127,14 @@ std::string StringPrintFImp(const char *msg, va_list args)
   // allocation guess what the size might be
   std::array<char, 256> local_buff;
   std::size_t size = local_buff.size();
-  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation in the android-ndk
+  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
+  // in the android-ndk
   auto ret = vsnprintf(local_buff.data(), size, msg, args_cp);
 
   va_end(args_cp);
 
   // handle empty expansion
-  if (ret == 0)
-    return std::string{};
+  if (ret == 0) return std::string{};
   if (static_cast<std::size_t>(ret) < size)
     return std::string(local_buff.data());
 
@@ -143,13 +142,13 @@ std::string StringPrintFImp(const char *msg, va_list args)
   // add 1 to size to account for null-byte in size cast to prevent overflow
   size = static_cast<std::size_t>(ret) + 1;
   auto buff_ptr = std::unique_ptr<char[]>(new char[size]);
-  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation in the android-ndk
+  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
+  // in the android-ndk
   ret = vsnprintf(buff_ptr.get(), size, msg, args);
   return std::string(buff_ptr.get());
 }
 
-std::string StringPrintF(const char* format, ...)
-{
+std::string StringPrintF(const char* format, ...) {
   va_list args;
   va_start(args, format);
   std::string tmp = StringPrintFImp(format, args);
@@ -160,10 +159,10 @@ std::string StringPrintF(const char* format, ...)
 void ReplaceAll(std::string* str, const std::string& from,
                 const std::string& to) {
   std::size_t start = 0;
-  while((start = str->find(from, start)) != std::string::npos) {
+  while ((start = str->find(from, start)) != std::string::npos) {
     str->replace(start, from.length(), to);
     start += to.length();
   }
 }
 
-} // end namespace benchmark
+}  // end namespace benchmark
diff --git a/utils/google-benchmark/src/string_util.h b/utils/google-benchmark/src/string_util.h
index b89fef5ff..0b190b91a 100644
--- a/utils/google-benchmark/src/string_util.h
+++ b/utils/google-benchmark/src/string_util.h
@@ -1,8 +1,8 @@
 #ifndef BENCHMARK_STRING_UTIL_H_
 #define BENCHMARK_STRING_UTIL_H_
 
-#include <string>
 #include <sstream>
+#include <string>
 #include <utility>
 #include "internal_macros.h"
 
@@ -14,23 +14,19 @@ std::string HumanReadableNumber(double n);
 
 std::string StringPrintF(const char* format, ...);
 
-inline std::ostream&
-StringCatImp(std::ostream& out) BENCHMARK_NOEXCEPT
-{
+inline std::ostream& StringCatImp(std::ostream& out) BENCHMARK_NOEXCEPT {
   return out;
 }
 
-template <class First, class ...Rest>
-inline std::ostream&
-StringCatImp(std::ostream& out, First&& f, Rest&&... rest)
-{
+template <class First, class... Rest>
+inline std::ostream& StringCatImp(std::ostream& out, First&& f,
+                                  Rest&&... rest) {
   out << std::forward<First>(f);
   return StringCatImp(out, std::forward<Rest>(rest)...);
 }
 
-template<class ...Args>
-inline std::string StrCat(Args&&... args)
-{
+template <class... Args>
+inline std::string StrCat(Args&&... args) {
   std::ostringstream ss;
   StringCatImp(ss, std::forward<Args>(args)...);
   return ss.str();
@@ -39,6 +35,6 @@ inline std::string StrCat(Args&&... args)
 void ReplaceAll(std::string* str, const std::string& from,
                 const std::string& to);
 
-} // end namespace benchmark
+}  // end namespace benchmark
 
-#endif // BENCHMARK_STRING_UTIL_H_
+#endif  // BENCHMARK_STRING_UTIL_H_
diff --git a/utils/google-benchmark/src/sysinfo.cc b/utils/google-benchmark/src/sysinfo.cc
index b5b9851f5..dd1e66306 100644
--- a/utils/google-benchmark/src/sysinfo.cc
+++ b/utils/google-benchmark/src/sysinfo.cc
@@ -17,13 +17,13 @@
 
 #ifdef BENCHMARK_OS_WINDOWS
 #include <Shlwapi.h>
-#include <Windows.h>
 #include <VersionHelpers.h>
+#include <Windows.h>
 #else
 #include <fcntl.h>
 #include <sys/resource.h>
-#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
 #include <sys/time.h>
+#include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
 #include <unistd.h>
 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
 #include <sys/sysctl.h>
@@ -31,8 +31,8 @@
 #endif
 
 #include <cerrno>
-#include <cstdio>
 #include <cstdint>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
@@ -52,7 +52,6 @@ namespace {
 std::once_flag cpuinfo_init;
 double cpuinfo_cycles_per_second = 1.0;
 int cpuinfo_num_cpus = 1;  // Conservative guess
-std::mutex cputimens_mutex;
 
 #if !defined BENCHMARK_OS_MACOSX
 const int64_t estimate_time_ms = 1000;
@@ -88,6 +87,22 @@ bool ReadIntFromFile(const char* file, long* value) {
 }
 #endif
 
+#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
+static std::string convertToLowerCase(std::string s) {
+  for (auto& ch : s)
+    ch = std::tolower(ch);
+  return s;
+}
+static bool startsWithKey(std::string Value, std::string Key,
+                          bool IgnoreCase = true) {
+  if (IgnoreCase) {
+    Key = convertToLowerCase(std::move(Key));
+    Value = convertToLowerCase(std::move(Value));
+  }
+  return Value.compare(0, Key.size(), Key) == 0;
+}
+#endif
+
 void InitializeSystemInfo() {
 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
   char line[1024];
@@ -127,7 +142,8 @@ void InitializeSystemInfo() {
   if (fd == -1) {
     perror(pname);
     if (!saw_mhz) {
-      cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+      cpuinfo_cycles_per_second =
+          static_cast<double>(EstimateCyclesPerSecond());
     }
     return;
   }
@@ -160,21 +176,21 @@ void InitializeSystemInfo() {
     // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
     // accept postive values. Some environments (virtual machines) report zero,
     // which would cause infinite looping in WallTime_Init.
-    if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) {
+    if (!saw_mhz && startsWithKey(line, "cpu MHz")) {
       const char* freqstr = strchr(line, ':');
       if (freqstr) {
         cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
         if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
           saw_mhz = true;
       }
-    } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) {
+    } else if (startsWithKey(line, "bogomips")) {
       const char* freqstr = strchr(line, ':');
       if (freqstr) {
         bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
         if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
           saw_bogo = true;
       }
-    } else if (strncmp(line, "processor", sizeof("processor") - 1) == 0) {
+    } else if (startsWithKey(line, "processor", /*IgnoreCase*/false)) {
       // The above comparison is case-sensitive because ARM kernels often
       // include a "Processor" line that tells you about the CPU, distinct
       // from the usual "processor" lines that give you CPU ids. No current
@@ -197,7 +213,8 @@ void InitializeSystemInfo() {
       cpuinfo_cycles_per_second = bogo_clock;
     } else {
       // If we don't even have bogomips, we'll use the slow estimation.
-      cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
+      cpuinfo_cycles_per_second =
+          static_cast<double>(EstimateCyclesPerSecond());
     }
   }
   if (num_cpus == 0) {
@@ -239,7 +256,6 @@ void InitializeSystemInfo() {
   }
 // TODO: also figure out cpuinfo_num_cpus
 
-
 #elif defined BENCHMARK_OS_WINDOWS
   // In NT, read MHz from the registry. If we fail to do so or we're in win9x
   // then make a crude estimate.
@@ -249,141 +265,47 @@ void InitializeSystemInfo() {
           SHGetValueA(HKEY_LOCAL_MACHINE,
                       "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
                       "~MHz", nullptr, &data, &data_size)))
-    cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
+    cpuinfo_cycles_per_second =
+        static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
   else
     cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
 
   SYSTEM_INFO sysinfo;
-  // Use memset as opposed to = {} to avoid GCC missing initializer false positives.
+  // Use memset as opposed to = {} to avoid GCC missing initializer false
+  // positives.
   std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
   GetSystemInfo(&sysinfo);
-  cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical processors in the current group
+  cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors;  // number of logical
+                                                    // processors in the current
+                                                    // group
 
 #elif defined BENCHMARK_OS_MACOSX
-  // returning "mach time units" per second. the current number of elapsed
-  // mach time units can be found by calling uint64 mach_absolute_time();
-  // while not as precise as actual CPU cycles, it is accurate in the face
-  // of CPU frequency scaling and multi-cpu/core machines.
-  // Our mac users have these types of machines, and accuracy
-  // (i.e. correctness) trumps precision.
-  // See cycleclock.h: CycleClock::Now(), which returns number of mach time
-  // units on Mac OS X.
-  mach_timebase_info_data_t timebase_info;
-  mach_timebase_info(&timebase_info);
-  double mach_time_units_per_nanosecond =
-      static_cast<double>(timebase_info.denom) /
-      static_cast<double>(timebase_info.numer);
-  cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
-
-  int num_cpus = 0;
+  int32_t num_cpus = 0;
   size_t size = sizeof(num_cpus);
-  int numcpus_name[] = {CTL_HW, HW_NCPU};
-  if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) ==
-          0 &&
-      (size == sizeof(num_cpus)))
+  if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 &&
+      (size == sizeof(num_cpus))) {
     cpuinfo_num_cpus = num_cpus;
-
+  } else {
+    fprintf(stderr, "%s\n", strerror(errno));
+    std::exit(EXIT_FAILURE);
+  }
+  int64_t cpu_freq = 0;
+  size = sizeof(cpu_freq);
+  if (::sysctlbyname("hw.cpufrequency", &cpu_freq, &size, nullptr, 0) == 0 &&
+      (size == sizeof(cpu_freq))) {
+    cpuinfo_cycles_per_second = cpu_freq;
+  } else {
+    fprintf(stderr, "%s\n", strerror(errno));
+    std::exit(EXIT_FAILURE);
+  }
 #else
   // Generic cycles per second counter
   cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
 #endif
 }
+
 }  // end namespace
 
-// getrusage() based implementation of MyCPUUsage
-static double MyCPUUsageRUsage() {
-#ifndef BENCHMARK_OS_WINDOWS
-  struct rusage ru;
-  if (getrusage(RUSAGE_SELF, &ru) == 0) {
-    return (static_cast<double>(ru.ru_utime.tv_sec) +
-            static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
-            static_cast<double>(ru.ru_stime.tv_sec) +
-            static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
-  } else {
-    return 0.0;
-  }
-#else
-  HANDLE proc = GetCurrentProcess();
-  FILETIME creation_time;
-  FILETIME exit_time;
-  FILETIME kernel_time;
-  FILETIME user_time;
-  ULARGE_INTEGER kernel;
-  ULARGE_INTEGER user;
-  GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
-  kernel.HighPart = kernel_time.dwHighDateTime;
-  kernel.LowPart = kernel_time.dwLowDateTime;
-  user.HighPart = user_time.dwHighDateTime;
-  user.LowPart = user_time.dwLowDateTime;
-  return (static_cast<double>(kernel.QuadPart) +
-          static_cast<double>(user.QuadPart)) * 1e-7;
-#endif  // OS_WINDOWS
-}
-
-#ifndef BENCHMARK_OS_WINDOWS
-static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
-  static int cputime_fd = -1;
-  if (cputime_fd == -1) {
-    cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
-    if (cputime_fd < 0) {
-      cputime_fd = -1;
-      return false;
-    }
-  }
-  char buff[64];
-  memset(buff, 0, sizeof(buff));
-  if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
-    close(cputime_fd);
-    cputime_fd = -1;
-    return false;
-  }
-  unsigned long long result = strtoull(buff, nullptr, 0);
-  if (result == (std::numeric_limits<unsigned long long>::max)()) {
-    close(cputime_fd);
-    cputime_fd = -1;
-    return false;
-  }
-  *cputime = static_cast<double>(result) / 1e9;
-  return true;
-}
-#endif  // OS_WINDOWS
-
-double MyCPUUsage() {
-#ifndef BENCHMARK_OS_WINDOWS
-  {
-    std::lock_guard<std::mutex> l(cputimens_mutex);
-    static bool use_cputime_ns = true;
-    if (use_cputime_ns) {
-      double value;
-      if (MyCPUUsageCPUTimeNsLocked(&value)) {
-        return value;
-      }
-      // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
-      VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
-      use_cputime_ns = false;
-    }
-  }
-#endif  // OS_WINDOWS
-  return MyCPUUsageRUsage();
-}
-
-double ChildrenCPUUsage() {
-#ifndef BENCHMARK_OS_WINDOWS
-  struct rusage ru;
-  if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
-    return (static_cast<double>(ru.ru_utime.tv_sec) +
-            static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
-            static_cast<double>(ru.ru_stime.tv_sec) +
-            static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
-  } else {
-    return 0.0;
-  }
-#else
-  // TODO: Not sure what this even means on Windows
-  return 0.0;
-#endif  // OS_WINDOWS
-}
-
 double CyclesPerSecond(void) {
   std::call_once(cpuinfo_init, InitializeSystemInfo);
   return cpuinfo_cycles_per_second;
@@ -410,8 +332,8 @@ bool CpuScalingEnabled() {
   // local file system. If reading the exported files fails, then we may not be
   // running on Linux, so we silently ignore all the read errors.
   for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
-    std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu,
-                                       "/cpufreq/scaling_governor");
+    std::string governor_file =
+        StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
     FILE* file = fopen(governor_file.c_str(), "r");
     if (!file) break;
     char buff[16];
diff --git a/utils/google-benchmark/src/sysinfo.h b/utils/google-benchmark/src/sysinfo.h
index eaf77e07c..c5d9916d2 100644
--- a/utils/google-benchmark/src/sysinfo.h
+++ b/utils/google-benchmark/src/sysinfo.h
@@ -2,8 +2,6 @@
 #define BENCHMARK_SYSINFO_H_
 
 namespace benchmark {
-double MyCPUUsage();
-double ChildrenCPUUsage();
 int NumCPUs();
 double CyclesPerSecond();
 bool CpuScalingEnabled();
diff --git a/utils/google-benchmark/src/timers.cc b/utils/google-benchmark/src/timers.cc
new file mode 100644
index 000000000..fadc08f79
--- /dev/null
+++ b/utils/google-benchmark/src/timers.cc
@@ -0,0 +1,195 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "timers.h"
+#include "internal_macros.h"
+
+#ifdef BENCHMARK_OS_WINDOWS
+#include <Shlwapi.h>
+#include <VersionHelpers.h>
+#include <Windows.h>
+#else
+#include <fcntl.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
+#include <unistd.h>
+#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
+#include <sys/sysctl.h>
+#endif
+#if defined(BENCHMARK_OS_MACOSX)
+#include <mach/mach_init.h>
+#include <mach/mach_port.h>
+#include <mach/thread_act.h>
+#endif
+#endif
+
+#include <cerrno>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <iostream>
+#include <limits>
+#include <mutex>
+
+#include "check.h"
+#include "log.h"
+#include "sleep.h"
+#include "string_util.h"
+
+namespace benchmark {
+
+// Suppress unused warnings on helper functions.
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+namespace {
+#if defined(BENCHMARK_OS_WINDOWS)
+double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) {
+  ULARGE_INTEGER kernel;
+  ULARGE_INTEGER user;
+  kernel.HighPart = kernel_time.dwHighDateTime;
+  kernel.LowPart = kernel_time.dwLowDateTime;
+  user.HighPart = user_time.dwHighDateTime;
+  user.LowPart = user_time.dwLowDateTime;
+  return (static_cast<double>(kernel.QuadPart) +
+          static_cast<double>(user.QuadPart)) *
+         1e-7;
+}
+#else
+double MakeTime(struct rusage const& ru) {
+  return (static_cast<double>(ru.ru_utime.tv_sec) +
+          static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
+          static_cast<double>(ru.ru_stime.tv_sec) +
+          static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
+}
+#endif
+#if defined(BENCHMARK_OS_MACOSX)
+double MakeTime(thread_basic_info_data_t const& info) {
+  return (static_cast<double>(info.user_time.seconds) +
+          static_cast<double>(info.user_time.microseconds) * 1e-6 +
+          static_cast<double>(info.system_time.seconds) +
+          static_cast<double>(info.system_time.microseconds) * 1e-6);
+}
+#endif
+#if defined(CLOCK_PROCESS_CPUTIME_ID) || defined(CLOCK_THREAD_CPUTIME_ID)
+double MakeTime(struct timespec const& ts) {
+  return ts.tv_sec + (static_cast<double>(ts.tv_nsec) * 1e-9);
+}
+#endif
+
+BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) {
+  std::cerr << "ERROR: " << msg << std::endl;
+  std::exit(EXIT_FAILURE);
+}
+
+}  // end namespace
+
+double ProcessCPUUsage() {
+// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
+// https://github.com/google/benchmark/pull/292
+#if defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
+  struct timespec spec;
+  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
+    return MakeTime(spec);
+  DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
+#elif defined(BENCHMARK_OS_WINDOWS)
+  HANDLE proc = GetCurrentProcess();
+  FILETIME creation_time;
+  FILETIME exit_time;
+  FILETIME kernel_time;
+  FILETIME user_time;
+  if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time,
+                      &user_time))
+    return MakeTime(kernel_time, user_time);
+  DiagnoseAndExit("GetProccessTimes() failed");
+#else
+  struct rusage ru;
+  if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru);
+  DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
+#endif
+}
+
+double ThreadCPUUsage() {
+// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
+// https://github.com/google/benchmark/pull/292
+#if defined(CLOCK_THREAD_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
+  struct timespec ts;
+  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
+  DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
+#elif defined(BENCHMARK_OS_WINDOWS)
+  HANDLE this_thread = GetCurrentThread();
+  FILETIME creation_time;
+  FILETIME exit_time;
+  FILETIME kernel_time;
+  FILETIME user_time;
+  GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time,
+                 &user_time);
+  return MakeTime(kernel_time, user_time);
+#elif defined(BENCHMARK_OS_MACOSX)
+  mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
+  thread_basic_info_data_t info;
+  mach_port_t thread = pthread_mach_thread_np(pthread_self());
+  if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count) ==
+      KERN_SUCCESS) {
+    return MakeTime(info);
+  }
+  DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
+#else
+#error Per-thread timing is not available on your system.
+#endif
+}
+
+namespace {
+
+std::string DateTimeString(bool local) {
+  typedef std::chrono::system_clock Clock;
+  std::time_t now = Clock::to_time_t(Clock::now());
+  const std::size_t kStorageSize = 128;
+  char storage[kStorageSize];
+  std::size_t written;
+
+  if (local) {
+#if defined(BENCHMARK_OS_WINDOWS)
+    written =
+        std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
+#else
+    std::tm timeinfo;
+    std::memset(&timeinfo, 0, sizeof(std::tm));
+    ::localtime_r(&now, &timeinfo);
+    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
+#endif
+  } else {
+#if defined(BENCHMARK_OS_WINDOWS)
+    written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
+#else
+    std::tm timeinfo;
+    std::memset(&timeinfo, 0, sizeof(std::tm));
+    ::gmtime_r(&now, &timeinfo);
+    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
+#endif
+  }
+  CHECK(written < kStorageSize);
+  ((void)written);  // prevent unused variable in optimized mode.
+  return std::string(storage);
+}
+
+}  // end namespace
+
+std::string LocalDateTimeString() { return DateTimeString(true); }
+
+}  // end namespace benchmark
diff --git a/utils/google-benchmark/src/timers.h b/utils/google-benchmark/src/timers.h
new file mode 100644
index 000000000..65606ccd9
--- /dev/null
+++ b/utils/google-benchmark/src/timers.h
@@ -0,0 +1,48 @@
+#ifndef BENCHMARK_TIMERS_H
+#define BENCHMARK_TIMERS_H
+
+#include <chrono>
+#include <string>
+
+namespace benchmark {
+
+// Return the CPU usage of the current process
+double ProcessCPUUsage();
+
+// Return the CPU usage of the children of the current process
+double ChildrenCPUUsage();
+
+// Return the CPU usage of the current thread
+double ThreadCPUUsage();
+
+#if defined(HAVE_STEADY_CLOCK)
+template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
+struct ChooseSteadyClock {
+  typedef std::chrono::high_resolution_clock type;
+};
+
+template <>
+struct ChooseSteadyClock<false> {
+  typedef std::chrono::steady_clock type;
+};
+#endif
+
+struct ChooseClockType {
+#if defined(HAVE_STEADY_CLOCK)
+  typedef ChooseSteadyClock<>::type type;
+#else
+  typedef std::chrono::high_resolution_clock type;
+#endif
+};
+
+inline double ChronoClockNow() {
+  typedef ChooseClockType::type ClockType;
+  using FpSeconds = std::chrono::duration<double, std::chrono::seconds::period>;
+  return FpSeconds(ClockType::now().time_since_epoch()).count();
+}
+
+std::string LocalDateTimeString();
+
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_TIMERS_H
diff --git a/utils/google-benchmark/src/walltime.cc b/utils/google-benchmark/src/walltime.cc
deleted file mode 100644
index 4bdbaa598..000000000
--- a/utils/google-benchmark/src/walltime.cc
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "benchmark/macros.h"
-#include "internal_macros.h"
-#include "walltime.h"
-
-#if defined(BENCHMARK_OS_WINDOWS)
-#include <time.h>
-#include <winsock.h> // for timeval
-#else
-#include <sys/time.h>
-#endif
-
-#include <cstdio>
-#include <cstdint>
-#include <cstring>
-#include <ctime>
-
-#include <atomic>
-#include <chrono>
-#include <limits>
-
-#include "arraysize.h"
-#include "check.h"
-#include "cycleclock.h"
-#include "log.h"
-#include "sysinfo.h"
-
-namespace benchmark {
-namespace walltime {
-
-namespace {
-
-#if defined(HAVE_STEADY_CLOCK)
-template <bool HighResIsSteady = std::chrono::high_resolution_clock::is_steady>
-struct ChooseSteadyClock {
-    typedef std::chrono::high_resolution_clock type;
-};
-
-template <>
-struct ChooseSteadyClock<false> {
-    typedef std::chrono::steady_clock type;
-};
-#endif
-
-struct ChooseClockType {
-#if defined(HAVE_STEADY_CLOCK)
-  typedef ChooseSteadyClock<>::type type;
-#else
-  typedef std::chrono::high_resolution_clock type;
-#endif
-};
-
-class WallTimeImp
-{
-public:
-  WallTime Now();
-
-  static WallTimeImp& GetWallTimeImp() {
-    static WallTimeImp* imp = new WallTimeImp();
-    return *imp;
-  }
-
-private:
-  WallTimeImp();
-  // Helper routines to load/store a float from an AtomicWord. Required because
-  // g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to
-  // get rid of this horror show.
-  void SetDrift(float f) {
-    int32_t w;
-    memcpy(&w, &f, sizeof(f));
-    std::atomic_store(&drift_adjust_, w);
-  }
-
-  float GetDrift() const {
-    float f;
-    int32_t w = std::atomic_load(&drift_adjust_);
-    memcpy(&f, &w, sizeof(f));
-    return f;
-  }
-
-  WallTime Slow() const {
-    struct timeval tv;
-#if defined(BENCHMARK_OS_WINDOWS)
-    FILETIME    file_time;
-    SYSTEMTIME  system_time;
-    ULARGE_INTEGER ularge;
-    const unsigned __int64 epoch = 116444736000000000LL;
-
-    GetSystemTime(&system_time);
-    SystemTimeToFileTime(&system_time, &file_time);
-    ularge.LowPart = file_time.dwLowDateTime;
-    ularge.HighPart = file_time.dwHighDateTime;
-
-    tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000));
-    tv.tv_usec = (long)(system_time.wMilliseconds * 1000);
-#else
-    gettimeofday(&tv, nullptr);
-#endif
-    return tv.tv_sec + tv.tv_usec * 1e-6;
-  }
-
-private:
-  static_assert(sizeof(float) <= sizeof(int32_t),
-               "type sizes don't allow the drift_adjust hack");
-
-  WallTime base_walltime_;
-  int64_t base_cycletime_;
-  int64_t cycles_per_second_;
-  double seconds_per_cycle_;
-  uint32_t last_adjust_time_;
-  std::atomic<int32_t> drift_adjust_;
-  int64_t max_interval_cycles_;
-
-  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp);
-};
-
-
-WallTime WallTimeImp::Now() {
-  WallTime now = 0.0;
-  WallTime result = 0.0;
-  int64_t ct = 0;
-  uint32_t top_bits = 0;
-  do {
-    ct = cycleclock::Now();
-    int64_t cycle_delta = ct - base_cycletime_;
-    result = base_walltime_ + cycle_delta * seconds_per_cycle_;
-
-    top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
-    // Recompute drift no more often than every 2^32 cycles.
-    // I.e., @2GHz, ~ every two seconds
-    if (top_bits == last_adjust_time_) {  // don't need to recompute drift
-      return result + GetDrift();
-    }
-
-    now = Slow();
-  } while (cycleclock::Now() - ct > max_interval_cycles_);
-  // We are now sure that "now" and "result" were produced within
-  // kMaxErrorInterval of one another.
-
-  SetDrift(static_cast<float>(now - result));
-  last_adjust_time_ = top_bits;
-  return now;
-}
-
-
-WallTimeImp::WallTimeImp()
-    : base_walltime_(0.0), base_cycletime_(0),
-      cycles_per_second_(0), seconds_per_cycle_(0.0),
-      last_adjust_time_(0), drift_adjust_(0),
-      max_interval_cycles_(0) {
-  const double kMaxErrorInterval = 100e-6;
-  cycles_per_second_ = static_cast<int64_t>(CyclesPerSecond());
-  CHECK(cycles_per_second_ != 0);
-  seconds_per_cycle_ = 1.0 / cycles_per_second_;
-  max_interval_cycles_ =
-      static_cast<int64_t>(cycles_per_second_ * kMaxErrorInterval);
-  do {
-    base_cycletime_ = cycleclock::Now();
-    base_walltime_ = Slow();
-  } while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_);
-  // We are now sure that "base_walltime" and "base_cycletime" were produced
-  // within kMaxErrorInterval of one another.
-
-  SetDrift(0.0);
-  last_adjust_time_ = static_cast<uint32_t>(uint64_t(base_cycletime_) >> 32);
-}
-
-WallTime CPUWalltimeNow() {
-  static WallTimeImp& imp = WallTimeImp::GetWallTimeImp();
-  return imp.Now();
-}
-
-WallTime ChronoWalltimeNow() {
-  typedef ChooseClockType::type Clock;
-  typedef std::chrono::duration<WallTime, std::chrono::seconds::period>
-          FPSeconds;
-  static_assert(std::chrono::treat_as_floating_point<WallTime>::value,
-                "This type must be treated as a floating point type.");
-  auto now = Clock::now().time_since_epoch();
-  return std::chrono::duration_cast<FPSeconds>(now).count();
-}
-
-bool UseCpuCycleClock() {
-    bool useWallTime = !CpuScalingEnabled();
-    if (useWallTime) {
-        VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n";
-    } else {
-        VLOG(1) << "Using std::chrono to provide walltime::Now().\n";
-    }
-    return useWallTime;
-}
-
-
-} // end anonymous namespace
-
-// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is
-// enabled at the start of the program then std::chrono::system_clock is used
-// instead.
-WallTime Now()
-{
-  static bool useCPUClock = UseCpuCycleClock();
-  if (useCPUClock) {
-    return CPUWalltimeNow();
-  } else {
-    return ChronoWalltimeNow();
-  }
-}
-
-}  // end namespace walltime
-
-
-namespace {
-
-std::string DateTimeString(bool local) {
-  typedef std::chrono::system_clock Clock;
-  std::time_t now = Clock::to_time_t(Clock::now());
-  char storage[128];
-  std::size_t written;
-
-  if (local) {
-#if defined(BENCHMARK_OS_WINDOWS)
-    written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
-#else
-    std::tm timeinfo;
-    std::memset(&timeinfo, 0, sizeof(std::tm));
-    ::localtime_r(&now, &timeinfo);
-    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
-#endif
-  } else {
-#if defined(BENCHMARK_OS_WINDOWS)
-    written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
-#else
-    std::tm timeinfo;
-    std::memset(&timeinfo, 0, sizeof(std::tm));
-    ::gmtime_r(&now, &timeinfo);
-    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
-#endif
-  }
-  CHECK(written < arraysize(storage));
-  ((void)written); // prevent unused variable in optimized mode.
-  return std::string(storage);
-}
-
-} // end namespace
-
-std::string LocalDateTimeString() {
-  return DateTimeString(true);
-}
-
-}  // end namespace benchmark
diff --git a/utils/google-benchmark/src/walltime.h b/utils/google-benchmark/src/walltime.h
deleted file mode 100644
index 38c26f332..000000000
--- a/utils/google-benchmark/src/walltime.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef BENCHMARK_WALLTIME_H_
-#define BENCHMARK_WALLTIME_H_
-
-#include <string>
-
-namespace benchmark {
-typedef double WallTime;
-
-namespace walltime {
-WallTime Now();
-}  // end namespace walltime
-
-std::string LocalDateTimeString();
-
-}  // end namespace benchmark
-
-#endif  // BENCHMARK_WALLTIME_H_
diff --git a/utils/google-benchmark/test/CMakeLists.txt b/utils/google-benchmark/test/CMakeLists.txt
index dd77744c0..87245984d 100644
--- a/utils/google-benchmark/test/CMakeLists.txt
+++ b/utils/google-benchmark/test/CMakeLists.txt
@@ -17,7 +17,7 @@ endmacro(compile_benchmark_test)
 
 
 macro(compile_output_test name)
-  add_executable(${name} "${name}.cc")
+  add_executable(${name} "${name}.cc" output_test.h)
   target_link_libraries(${name} output_test_helper benchmark
           ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
 endmacro(compile_output_test)
@@ -85,8 +85,14 @@ if (BENCHMARK_HAS_CXX03_FLAG)
   add_test(cxx03 cxx03_test --benchmark_min_time=0.01)
 endif()
 
+# Attempt to work around flaky test failures when running on Appveyor servers.
+if (DEFINED ENV{APPVEYOR})
+  set(COMPLEXITY_MIN_TIME "0.5")
+else()
+  set(COMPLEXITY_MIN_TIME "0.01")
+endif()
 compile_output_test(complexity_test)
-add_test(complexity_benchmark complexity_test --benchmark_min_time=0.01)
+add_test(complexity_benchmark complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME})
 
 # Add the coverage command(s)
 if(CMAKE_BUILD_TYPE)
diff --git a/utils/google-benchmark/test/basic_test.cc b/utils/google-benchmark/test/basic_test.cc
index 570ff2269..22de007cb 100644
--- a/utils/google-benchmark/test/basic_test.cc
+++ b/utils/google-benchmark/test/basic_test.cc
@@ -1,8 +1,7 @@
 
 #include "benchmark/benchmark_api.h"
 
-#define BASIC_BENCHMARK_TEST(x) \
-    BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192)
+#define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192)
 
 void BM_empty(benchmark::State& state) {
   while (state.KeepRunning()) {
@@ -26,7 +25,7 @@ void BM_spin_pause_before(benchmark::State& state) {
   for (int i = 0; i < state.range(0); ++i) {
     benchmark::DoNotOptimize(i);
   }
-  while(state.KeepRunning()) {
+  while (state.KeepRunning()) {
     for (int i = 0; i < state.range(0); ++i) {
       benchmark::DoNotOptimize(i);
     }
@@ -35,9 +34,8 @@ void BM_spin_pause_before(benchmark::State& state) {
 BASIC_BENCHMARK_TEST(BM_spin_pause_before);
 BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
 
-
 void BM_spin_pause_during(benchmark::State& state) {
-  while(state.KeepRunning()) {
+  while (state.KeepRunning()) {
     state.PauseTiming();
     for (int i = 0; i < state.range(0); ++i) {
       benchmark::DoNotOptimize(i);
@@ -52,7 +50,7 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_during);
 BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu();
 
 void BM_pause_during(benchmark::State& state) {
-  while(state.KeepRunning()) {
+  while (state.KeepRunning()) {
     state.PauseTiming();
     state.ResumeTiming();
   }
@@ -63,7 +61,7 @@ BENCHMARK(BM_pause_during)->UseRealTime();
 BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu();
 
 void BM_spin_pause_after(benchmark::State& state) {
-  while(state.KeepRunning()) {
+  while (state.KeepRunning()) {
     for (int i = 0; i < state.range(0); ++i) {
       benchmark::DoNotOptimize(i);
     }
@@ -75,12 +73,11 @@ void BM_spin_pause_after(benchmark::State& state) {
 BASIC_BENCHMARK_TEST(BM_spin_pause_after);
 BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu();
 
-
 void BM_spin_pause_before_and_after(benchmark::State& state) {
   for (int i = 0; i < state.range(0); ++i) {
     benchmark::DoNotOptimize(i);
   }
-  while(state.KeepRunning()) {
+  while (state.KeepRunning()) {
     for (int i = 0; i < state.range(0); ++i) {
       benchmark::DoNotOptimize(i);
     }
@@ -92,9 +89,9 @@ void BM_spin_pause_before_and_after(benchmark::State& state) {
 BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after);
 BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu();
 
-
 void BM_empty_stop_start(benchmark::State& state) {
-  while (state.KeepRunning()) { }
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_empty_stop_start);
 BENCHMARK(BM_empty_stop_start)->ThreadPerCpu();
diff --git a/utils/google-benchmark/test/benchmark_test.cc b/utils/google-benchmark/test/benchmark_test.cc
index fe7d82c6a..d832f81ae 100644
--- a/utils/google-benchmark/test/benchmark_test.cc
+++ b/utils/google-benchmark/test/benchmark_test.cc
@@ -4,6 +4,7 @@
 #include <math.h>
 #include <stdint.h>
 
+#include <chrono>
 #include <cstdlib>
 #include <iostream>
 #include <limits>
@@ -13,15 +14,14 @@
 #include <set>
 #include <sstream>
 #include <string>
-#include <vector>
-#include <chrono>
 #include <thread>
 #include <utility>
+#include <vector>
 
 #if defined(__GNUC__)
-# define BENCHMARK_NOINLINE __attribute__((noinline))
+#define BENCHMARK_NOINLINE __attribute__((noinline))
 #else
-# define BENCHMARK_NOINLINE
+#define BENCHMARK_NOINLINE
 #endif
 
 namespace {
@@ -42,8 +42,7 @@ double CalculatePi(int depth) {
 
 std::set<int> ConstructRandomSet(int size) {
   std::set<int> s;
-  for (int i = 0; i < size; ++i)
-    s.insert(i);
+  for (int i = 0; i < size; ++i) s.insert(i);
   return s;
 }
 
@@ -54,8 +53,7 @@ std::vector<int>* test_vector = nullptr;
 
 static void BM_Factorial(benchmark::State& state) {
   int fac_42 = 0;
-  while (state.KeepRunning())
-    fac_42 = Factorial(8);
+  while (state.KeepRunning()) fac_42 = Factorial(8);
   // Prevent compiler optimizations
   std::stringstream ss;
   ss << fac_42;
@@ -66,8 +64,7 @@ BENCHMARK(BM_Factorial)->UseRealTime();
 
 static void BM_CalculatePiRange(benchmark::State& state) {
   double pi = 0.0;
-  while (state.KeepRunning())
-    pi = CalculatePi(state.range(0));
+  while (state.KeepRunning()) pi = CalculatePi(state.range(0));
   std::stringstream ss;
   ss << pi;
   state.SetLabel(ss.str());
@@ -89,27 +86,27 @@ static void BM_SetInsert(benchmark::State& state) {
     state.PauseTiming();
     std::set<int> data = ConstructRandomSet(state.range(0));
     state.ResumeTiming();
-    for (int j = 0; j < state.range(1); ++j)
-      data.insert(rand());
+    for (int j = 0; j < state.range(1); ++j) data.insert(rand());
   }
   state.SetItemsProcessed(state.iterations() * state.range(1));
   state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int));
 }
-BENCHMARK(BM_SetInsert)->Ranges({{1<<10,8<<10}, {1,10}});
+BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {1, 10}});
 
-template<typename Container, typename ValueType = typename Container::value_type>
+template <typename Container,
+          typename ValueType = typename Container::value_type>
 static void BM_Sequential(benchmark::State& state) {
   ValueType v = 42;
   while (state.KeepRunning()) {
     Container c;
-    for (int i = state.range(0); --i; )
-      c.push_back(v);
+    for (int i = state.range(0); --i;) c.push_back(v);
   }
   const size_t items_processed = state.iterations() * state.range(0);
   state.SetItemsProcessed(items_processed);
   state.SetBytesProcessed(items_processed * sizeof(v));
 }
-BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)->Range(1 << 0, 1 << 10);
+BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)
+    ->Range(1 << 0, 1 << 10);
 BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
 // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
 #if __cplusplus >= 201103L
@@ -119,10 +116,9 @@ BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
 static void BM_StringCompare(benchmark::State& state) {
   std::string s1(state.range(0), '-');
   std::string s2(state.range(0), '-');
-  while (state.KeepRunning())
-    benchmark::DoNotOptimize(s1.compare(s2));
+  while (state.KeepRunning()) benchmark::DoNotOptimize(s1.compare(s2));
 }
-BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
+BENCHMARK(BM_StringCompare)->Range(1, 1 << 20);
 
 static void BM_SetupTeardown(benchmark::State& state) {
   if (state.thread_index == 0) {
@@ -132,7 +128,7 @@ static void BM_SetupTeardown(benchmark::State& state) {
   int i = 0;
   while (state.KeepRunning()) {
     std::lock_guard<std::mutex> l(test_vector_mu);
-    if (i%2 == 0)
+    if (i % 2 == 0)
       test_vector->push_back(i);
     else
       test_vector->pop_back();
@@ -151,7 +147,7 @@ static void BM_LongTest(benchmark::State& state) {
       benchmark::DoNotOptimize(tracker += i);
   }
 }
-BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
+BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28);
 
 static void BM_ParallelMemset(benchmark::State& state) {
   int size = state.range(0) / sizeof(int);
@@ -180,20 +176,18 @@ BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
 static void BM_ManualTiming(benchmark::State& state) {
   size_t slept_for = 0;
   int microseconds = state.range(0);
-  std::chrono::duration<double, std::micro> sleep_duration {
-    static_cast<double>(microseconds)
-  };
+  std::chrono::duration<double, std::micro> sleep_duration{
+      static_cast<double>(microseconds)};
 
   while (state.KeepRunning()) {
-    auto start   = std::chrono::high_resolution_clock::now();
+    auto start = std::chrono::high_resolution_clock::now();
     // Simulate some useful workload with a sleep
-    std::this_thread::sleep_for(std::chrono::duration_cast<
-      std::chrono::nanoseconds>(sleep_duration));
-    auto end     = std::chrono::high_resolution_clock::now();
+    std::this_thread::sleep_for(
+        std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration));
+    auto end = std::chrono::high_resolution_clock::now();
 
     auto elapsed =
-      std::chrono::duration_cast<std::chrono::duration<double>>(
-        end - start);
+        std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
 
     state.SetIterationTime(elapsed.count());
     slept_for += microseconds;
@@ -205,20 +199,43 @@ BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
 
 #if __cplusplus >= 201103L
 
-template <class ...Args>
+template <class... Args>
 void BM_with_args(benchmark::State& state, Args&&...) {
-  while (state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44);
-BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test,
-                  std::string("abc"), std::pair<int, double>(42, 3.8));
+BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"),
+                  std::pair<int, double>(42, 3.8));
 
 void BM_non_template_args(benchmark::State& state, int, double) {
-  while(state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
 
-#endif // __cplusplus >= 201103L
+#endif  // __cplusplus >= 201103L
+
+static void BM_DenseThreadRanges(benchmark::State& st) {
+  switch (st.range(0)) {
+    case 1:
+      assert(st.threads == 1 || st.threads == 2 || st.threads == 3);
+      break;
+    case 2:
+      assert(st.threads == 1 || st.threads == 3 || st.threads == 4);
+      break;
+    case 3:
+      assert(st.threads == 5 || st.threads == 8 || st.threads == 11 ||
+             st.threads == 14);
+      break;
+    default:
+      assert(false && "Invalid test case number");
+  }
+  while (st.KeepRunning()) {
+  }
+}
+BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3);
+BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2);
+BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3);
 
 BENCHMARK_MAIN()
-
diff --git a/utils/google-benchmark/test/complexity_test.cc b/utils/google-benchmark/test/complexity_test.cc
index d2e5e194d..14e03b06e 100644
--- a/utils/google-benchmark/test/complexity_test.cc
+++ b/utils/google-benchmark/test/complexity_test.cc
@@ -1,47 +1,41 @@
 #undef NDEBUG
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+#include <vector>
 #include "benchmark/benchmark.h"
 #include "output_test.h"
-#include <cassert>
-#include <vector>
-#include <algorithm>
-#include <cstdlib>
-#include <cmath>
 
 namespace {
 
 #define ADD_COMPLEXITY_CASES(...) \
-    int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__)
+  int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__)
 
-int AddComplexityTest(std::string big_o_test_name,
-                      std::string rms_test_name, std::string big_o) {
-  SetSubstitutions({
-        {"%bigo_name", big_o_test_name},
-        {"%rms_name", rms_test_name},
-        {"%bigo_str", "[ ]*" + std::string(dec_re) + " " + big_o},
-        {"%bigo", big_o},
-        {"%rms", "[ ]*[0-9]+ %"}
-  });
-  AddCases(TC_ConsoleOut, {
-    {"^%bigo_name %bigo_str %bigo_str[ ]*$"},
-    {"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name.
-    {"^%rms_name %rms %rms[ ]*$", MR_Next}
-  });
-  AddCases(TC_JSONOut, {
-    {"\"name\": \"%bigo_name\",$"},
-    {"\"cpu_coefficient\": [0-9]+,$", MR_Next},
-    {"\"real_coefficient\": [0-9]{1,5},$", MR_Next},
-    {"\"big_o\": \"%bigo\",$", MR_Next},
-    {"\"time_unit\": \"ns\"$", MR_Next},
-    {"}", MR_Next},
-    {"\"name\": \"%rms_name\",$"},
-    {"\"rms\": [0-9]+%$", MR_Next},
-    {"}", MR_Next}
-  });
-  AddCases(TC_CSVOut, {
-    {"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"},
-    {"^\"%bigo_name\"", MR_Not},
-    {"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}
-  });
+int AddComplexityTest(std::string big_o_test_name, std::string rms_test_name,
+                      std::string big_o) {
+  SetSubstitutions({{"%bigo_name", big_o_test_name},
+                    {"%rms_name", rms_test_name},
+                    {"%bigo_str", "[ ]* %float " + big_o},
+                    {"%bigo", big_o},
+                    {"%rms", "[ ]*[0-9]+ %"}});
+  AddCases(
+      TC_ConsoleOut,
+      {{"^%bigo_name %bigo_str %bigo_str[ ]*$"},
+       {"^%bigo_name", MR_Not},  // Assert we we didn't only matched a name.
+       {"^%rms_name %rms %rms[ ]*$", MR_Next}});
+  AddCases(TC_JSONOut, {{"\"name\": \"%bigo_name\",$"},
+                        {"\"cpu_coefficient\": [0-9]+,$", MR_Next},
+                        {"\"real_coefficient\": [0-9]{1,5},$", MR_Next},
+                        {"\"big_o\": \"%bigo\",$", MR_Next},
+                        {"\"time_unit\": \"ns\"$", MR_Next},
+                        {"}", MR_Next},
+                        {"\"name\": \"%rms_name\",$"},
+                        {"\"rms\": %float$", MR_Next},
+                        {"}", MR_Next}});
+  AddCases(TC_CSVOut, {{"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"},
+                       {"^\"%bigo_name\"", MR_Not},
+                       {"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}});
   return 0;
 }
 
@@ -53,23 +47,26 @@ int AddComplexityTest(std::string big_o_test_name,
 
 void BM_Complexity_O1(benchmark::State& state) {
   while (state.KeepRunning()) {
-      for (int i=0; i < 1024; ++i) {
-          benchmark::DoNotOptimize(&i);
-      }
+    for (int i = 0; i < 1024; ++i) {
+      benchmark::DoNotOptimize(&i);
+    }
   }
   state.SetComplexityN(state.range(0));
 }
-BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(benchmark::o1);
-BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity();
-BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity([](int){return 1.0; });
+BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1);
+BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity();
+BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity([](int) {
+  return 1.0;
+});
 
-const char* big_o_1_test_name = "BM_Complexity_O1_BigO";
-const char* rms_o_1_test_name = "BM_Complexity_O1_RMS";
-const char* enum_big_o_1 = "\\([0-9]+\\)";
-// FIXME: Tolerate both '(1)' and 'lgN' as output when the complexity is auto deduced.
+const char *big_o_1_test_name = "BM_Complexity_O1_BigO";
+const char *rms_o_1_test_name = "BM_Complexity_O1_RMS";
+const char *enum_big_o_1 = "\\([0-9]+\\)";
+// FIXME: Tolerate both '(1)' and 'lgN' as output when the complexity is auto
+// deduced.
 // See https://github.com/google/benchmark/issues/272
-const char* auto_big_o_1 = "(\\([0-9]+\\))|(lgN)";
-const char* lambda_big_o_1 = "f\\(N\\)";
+const char *auto_big_o_1 = "(\\([0-9]+\\))|(lgN)";
+const char *lambda_big_o_1 = "f\\(N\\)";
 
 // Add enum tests
 ADD_COMPLEXITY_CASES(big_o_1_test_name, rms_o_1_test_name, enum_big_o_1);
@@ -95,20 +92,30 @@ std::vector<int> ConstructRandomVector(int size) {
 
 void BM_Complexity_O_N(benchmark::State& state) {
   auto v = ConstructRandomVector(state.range(0));
-  const int item_not_in_vector = state.range(0)*2; // Test worst case scenario (item not in vector)
+  const int item_not_in_vector =
+      state.range(0) * 2;  // Test worst case scenario (item not in vector)
   while (state.KeepRunning()) {
-      benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
+    benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
   }
   state.SetComplexityN(state.range(0));
 }
-BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oN);
-BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) -> double{return n; });
-BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity();
+BENCHMARK(BM_Complexity_O_N)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16)
+    ->Complexity(benchmark::oN);
+BENCHMARK(BM_Complexity_O_N)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16)
+    ->Complexity([](int n) -> double { return n; });
+BENCHMARK(BM_Complexity_O_N)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16)
+    ->Complexity();
 
-const char* big_o_n_test_name = "BM_Complexity_O_N_BigO";
-const char* rms_o_n_test_name = "BM_Complexity_O_N_RMS";
-const char* enum_auto_big_o_n = "N";
-const char* lambda_big_o_n = "f\\(N\\)";
+const char *big_o_n_test_name = "BM_Complexity_O_N_BigO";
+const char *rms_o_n_test_name = "BM_Complexity_O_N_RMS";
+const char *enum_auto_big_o_n = "N";
+const char *lambda_big_o_n = "f\\(N\\)";
 
 // Add enum tests
 ADD_COMPLEXITY_CASES(big_o_n_test_name, rms_o_n_test_name, enum_auto_big_o_n);
@@ -123,31 +130,38 @@ ADD_COMPLEXITY_CASES(big_o_n_test_name, rms_o_n_test_name, lambda_big_o_n);
 static void BM_Complexity_O_N_log_N(benchmark::State& state) {
   auto v = ConstructRandomVector(state.range(0));
   while (state.KeepRunning()) {
-      std::sort(v.begin(), v.end());
+    std::sort(v.begin(), v.end());
   }
   state.SetComplexityN(state.range(0));
 }
-BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oNLogN);
-BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) {return n * std::log2(n); });
-BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity();
+BENCHMARK(BM_Complexity_O_N_log_N)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16)
+    ->Complexity(benchmark::oNLogN);
+BENCHMARK(BM_Complexity_O_N_log_N)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16)
+    ->Complexity([](int n) { return n * std::log2(n); });
+BENCHMARK(BM_Complexity_O_N_log_N)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16)
+    ->Complexity();
 
-const char* big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO";
-const char* rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS";
-const char* enum_auto_big_o_n_lg_n = "NlgN";
-const char* lambda_big_o_n_lg_n = "f\\(N\\)";
+const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO";
+const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS";
+const char *enum_auto_big_o_n_lg_n = "NlgN";
+const char *lambda_big_o_n_lg_n = "f\\(N\\)";
 
 // Add enum tests
-ADD_COMPLEXITY_CASES(big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n);
+ADD_COMPLEXITY_CASES(big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name,
+                     enum_auto_big_o_n_lg_n);
 
 // Add lambda tests
-ADD_COMPLEXITY_CASES(big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n);
-
+ADD_COMPLEXITY_CASES(big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name,
+                     lambda_big_o_n_lg_n);
 
 // ========================================================================= //
 // --------------------------- TEST CASES END ------------------------------ //
 // ========================================================================= //
 
-int main(int argc, char* argv[]) {
-  RunOutputTests(argc, argv);
-}
-
+int main(int argc, char *argv[]) { RunOutputTests(argc, argv); }
diff --git a/utils/google-benchmark/test/cxx03_test.cc b/utils/google-benchmark/test/cxx03_test.cc
index 9994c9e1c..4f3d0fb6f 100644
--- a/utils/google-benchmark/test/cxx03_test.cc
+++ b/utils/google-benchmark/test/cxx03_test.cc
@@ -1,6 +1,6 @@
 #undef NDEBUG
-#include <cstddef>
 #include <cassert>
+#include <cstddef>
 
 #include "benchmark/benchmark.h"
 
@@ -9,32 +9,32 @@
 #endif
 
 void BM_empty(benchmark::State& state) {
-    while (state.KeepRunning()) {
-        volatile std::size_t x = state.iterations();
-        ((void)x);
-    }
+  while (state.KeepRunning()) {
+    volatile std::size_t x = state.iterations();
+    ((void)x);
+  }
 }
 BENCHMARK(BM_empty);
 
 // The new C++11 interface for args/ranges requires initializer list support.
 // Therefore we provide the old interface to support C++03.
 void BM_old_arg_range_interface(benchmark::State& state) {
-    assert((state.range(0) == 1 && state.range(1) == 2) ||
-           (state.range(0) == 5 && state.range(1) == 6));
-    while (state.KeepRunning()) {
-    }
+  assert((state.range(0) == 1 && state.range(1) == 2) ||
+         (state.range(0) == 5 && state.range(1) == 6));
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_old_arg_range_interface)->ArgPair(1, 2)->RangePair(5, 5, 6, 6);
 
 template <class T, class U>
 void BM_template2(benchmark::State& state) {
-    BM_empty(state);
+  BM_empty(state);
 }
 BENCHMARK_TEMPLATE2(BM_template2, int, long);
 
 template <class T>
 void BM_template1(benchmark::State& state) {
-    BM_empty(state);
+  BM_empty(state);
 }
 BENCHMARK_TEMPLATE(BM_template1, long);
 BENCHMARK_TEMPLATE1(BM_template1, int);
diff --git a/utils/google-benchmark/test/diagnostics_test.cc b/utils/google-benchmark/test/diagnostics_test.cc
index 60fa3b161..c6c235d0c 100644
--- a/utils/google-benchmark/test/diagnostics_test.cc
+++ b/utils/google-benchmark/test/diagnostics_test.cc
@@ -7,10 +7,11 @@
 // NOTE: Users should NOT include or use src/check.h. This is only done in
 // order to test library internals.
 
-#include "benchmark/benchmark_api.h"
-#include "../src/check.h"
-#include <stdexcept>
 #include <cstdlib>
+#include <stdexcept>
+
+#include "../src/check.h"
+#include "benchmark/benchmark_api.h"
 
 #if defined(__GNUC__) && !defined(__EXCEPTIONS)
 #define TEST_HAS_NO_EXCEPTIONS
@@ -29,13 +30,15 @@ void try_invalid_pause_resume(benchmark::State& state) {
   try {
     state.PauseTiming();
     std::abort();
-  } catch (std::logic_error const&) {}
+  } catch (std::logic_error const&) {
+  }
   try {
     state.ResumeTiming();
     std::abort();
-  } catch (std::logic_error const&) {}
+  } catch (std::logic_error const&) {
+  }
 #else
-  (void)state; // avoid unused warning
+  (void)state;  // avoid unused warning
 #endif
 }
 
@@ -54,7 +57,7 @@ void BM_diagnostic_test(benchmark::State& state) {
 }
 BENCHMARK(BM_diagnostic_test);
 
-int main(int argc, char** argv) {
+int main(int argc, char* argv[]) {
   benchmark::internal::GetAbortHandler() = &TestHandler;
   benchmark::Initialize(&argc, argv);
   benchmark::RunSpecifiedBenchmarks();
diff --git a/utils/google-benchmark/test/donotoptimize_test.cc b/utils/google-benchmark/test/donotoptimize_test.cc
index e4453fbbc..b21187aad 100644
--- a/utils/google-benchmark/test/donotoptimize_test.cc
+++ b/utils/google-benchmark/test/donotoptimize_test.cc
@@ -4,15 +4,12 @@
 
 namespace {
 #if defined(__GNUC__)
-  std::uint64_t double_up(const std::uint64_t x) __attribute__ ((const));
+std::uint64_t double_up(const std::uint64_t x) __attribute__((const));
 #endif
-  std::uint64_t double_up(const std::uint64_t x) {
-    return x * 2;
-  }
+std::uint64_t double_up(const std::uint64_t x) { return x * 2; }
 }
 
 int main(int, char*[]) {
-
   // this test verifies compilation of DoNotOptimize() for some types
 
   char buffer8[8];
diff --git a/utils/google-benchmark/test/filter_test.cc b/utils/google-benchmark/test/filter_test.cc
index 0ba407115..3a205295f 100644
--- a/utils/google-benchmark/test/filter_test.cc
+++ b/utils/google-benchmark/test/filter_test.cc
@@ -27,9 +27,7 @@ class TestReporter : public benchmark::ConsoleReporter {
 
   virtual ~TestReporter() {}
 
-  size_t GetCount() const {
-    return count_;
-  }
+  size_t GetCount() const { return count_; }
 
  private:
   mutable size_t count_;
@@ -37,46 +35,47 @@ class TestReporter : public benchmark::ConsoleReporter {
 
 }  // end namespace
 
-
 static void NoPrefix(benchmark::State& state) {
-  while (state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(NoPrefix);
 
 static void BM_Foo(benchmark::State& state) {
-  while (state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_Foo);
 
-
 static void BM_Bar(benchmark::State& state) {
-  while (state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_Bar);
 
-
 static void BM_FooBar(benchmark::State& state) {
-  while (state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_FooBar);
 
-
 static void BM_FooBa(benchmark::State& state) {
-  while (state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_FooBa);
 
-
-
-int main(int argc, char** argv) {
+int main(int argc, char **argv) {
   bool list_only = false;
-  for (int i=0; i < argc; ++i)
-    list_only |= std::string(argv[i]).find("--benchmark_list_tests") != std::string::npos;
+  for (int i = 0; i < argc; ++i)
+    list_only |= std::string(argv[i]).find("--benchmark_list_tests") !=
+                 std::string::npos;
 
   benchmark::Initialize(&argc, argv);
 
   TestReporter test_reporter;
-  const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&test_reporter);
+  const size_t returned_count =
+      benchmark::RunSpecifiedBenchmarks(&test_reporter);
 
   if (argc == 2) {
     // Make sure we ran all of the tests
diff --git a/utils/google-benchmark/test/fixture_test.cc b/utils/google-benchmark/test/fixture_test.cc
index 226bb2a1b..bbc2f9579 100644
--- a/utils/google-benchmark/test/fixture_test.cc
+++ b/utils/google-benchmark/test/fixture_test.cc
@@ -20,15 +20,12 @@ class MyFixture : public ::benchmark::Fixture {
     }
   }
 
-  ~MyFixture() {
-    assert(data == nullptr);
-  }
+  ~MyFixture() { assert(data == nullptr); }
 
   std::unique_ptr<int> data;
 };
 
-
-BENCHMARK_F(MyFixture, Foo)(benchmark::State& st) {
+BENCHMARK_F(MyFixture, Foo)(benchmark::State &st) {
   assert(data.get() != nullptr);
   assert(*data == 42);
   while (st.KeepRunning()) {
diff --git a/utils/google-benchmark/test/map_test.cc b/utils/google-benchmark/test/map_test.cc
index 729c012fd..83457c998 100644
--- a/utils/google-benchmark/test/map_test.cc
+++ b/utils/google-benchmark/test/map_test.cc
@@ -37,9 +37,7 @@ class MapFixture : public ::benchmark::Fixture {
     m = ConstructRandomMap(st.range(0));
   }
 
-  void TearDown(const ::benchmark::State&) {
-    m.clear();
-  }
+  void TearDown(const ::benchmark::State&) { m.clear(); }
 
   std::map<int, int> m;
 };
@@ -53,6 +51,6 @@ BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) {
   }
   state.SetItemsProcessed(state.iterations() * size);
 }
-BENCHMARK_REGISTER_F(MapFixture, Lookup)->Range(1<<3, 1<<12);
+BENCHMARK_REGISTER_F(MapFixture, Lookup)->Range(1 << 3, 1 << 12);
 
 BENCHMARK_MAIN()
diff --git a/utils/google-benchmark/test/multiple_ranges_test.cc b/utils/google-benchmark/test/multiple_ranges_test.cc
index ad9f0d8cd..8e67b3b2a 100644
--- a/utils/google-benchmark/test/multiple_ranges_test.cc
+++ b/utils/google-benchmark/test/multiple_ranges_test.cc
@@ -1,19 +1,30 @@
 #include "benchmark/benchmark.h"
 
-#include <set>
 #include <cassert>
+#include <set>
 
 class MultipleRangesFixture : public ::benchmark::Fixture {
  public:
   MultipleRangesFixture()
-      : expectedValues({
-        {1, 3, 5}, {1, 3, 8}, {1, 3, 15}, {2, 3, 5}, {2, 3, 8}, {2, 3, 15},
-        {1, 4, 5}, {1, 4, 8}, {1, 4, 15}, {2, 4, 5}, {2, 4, 8}, {2, 4, 15},
-        {1, 7, 5}, {1, 7, 8}, {1, 7, 15}, {2, 7, 5}, {2, 7, 8}, {2, 7, 15},
-        {7, 6, 3}
-      })
-  {
-  }
+      : expectedValues({{1, 3, 5},
+                        {1, 3, 8},
+                        {1, 3, 15},
+                        {2, 3, 5},
+                        {2, 3, 8},
+                        {2, 3, 15},
+                        {1, 4, 5},
+                        {1, 4, 8},
+                        {1, 4, 15},
+                        {2, 4, 5},
+                        {2, 4, 8},
+                        {2, 4, 15},
+                        {1, 7, 5},
+                        {1, 7, 8},
+                        {1, 7, 15},
+                        {2, 7, 5},
+                        {2, 7, 8},
+                        {2, 7, 15},
+                        {7, 6, 3}}) {}
 
   void SetUp(const ::benchmark::State& state) {
     std::vector<int> ranges = {state.range(0), state.range(1), state.range(2)};
@@ -26,12 +37,11 @@ class MultipleRangesFixture : public ::benchmark::Fixture {
   virtual ~MultipleRangesFixture() {
     assert(actualValues.size() == expectedValues.size());
   }
-  
+
   std::set<std::vector<int>> expectedValues;
   std::set<std::vector<int>> actualValues;
 };
 
-
 BENCHMARK_DEFINE_F(MultipleRangesFixture, Empty)(benchmark::State& state) {
   while (state.KeepRunning()) {
     int product = state.range(0) * state.range(1) * state.range(2);
@@ -41,21 +51,24 @@ BENCHMARK_DEFINE_F(MultipleRangesFixture, Empty)(benchmark::State& state) {
   }
 }
 
-BENCHMARK_REGISTER_F(MultipleRangesFixture, Empty)->RangeMultiplier(2)
-    ->Ranges({{1, 2}, {3, 7}, {5, 15}})->Args({7, 6, 3});
+BENCHMARK_REGISTER_F(MultipleRangesFixture, Empty)
+    ->RangeMultiplier(2)
+    ->Ranges({{1, 2}, {3, 7}, {5, 15}})
+    ->Args({7, 6, 3});
 
 void BM_CheckDefaultArgument(benchmark::State& state) {
   // Test that the 'range()' without an argument is the same as 'range(0)'.
   assert(state.range() == state.range(0));
   assert(state.range() != state.range(1));
-  while (state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_CheckDefaultArgument)->Ranges({{1, 5}, {6, 10}});
 
 static void BM_MultipleRanges(benchmark::State& st) {
-    while (st.KeepRunning()) {}
+  while (st.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_MultipleRanges)->Ranges({{5, 5}, {6, 6}});
 
-
 BENCHMARK_MAIN()
diff --git a/utils/google-benchmark/test/options_test.cc b/utils/google-benchmark/test/options_test.cc
index 06bc9c2ee..bedb1cc3e 100644
--- a/utils/google-benchmark/test/options_test.cc
+++ b/utils/google-benchmark/test/options_test.cc
@@ -12,8 +12,7 @@ void BM_basic_slow(benchmark::State& state) {
   std::chrono::milliseconds sleep_duration(state.range(0));
   while (state.KeepRunning()) {
     std::this_thread::sleep_for(
-      std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration)
-      );
+        std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration));
   }
 }
 
diff --git a/utils/google-benchmark/test/output_test.h b/utils/google-benchmark/test/output_test.h
index 98ce54b2e..57d4397ad 100644
--- a/utils/google-benchmark/test/output_test.h
+++ b/utils/google-benchmark/test/output_test.h
@@ -2,28 +2,28 @@
 #define TEST_OUTPUT_TEST_H
 
 #undef NDEBUG
-#include "benchmark/benchmark.h"
-#include "../src/re.h"
-#include <vector>
-#include <string>
 #include <initializer_list>
 #include <memory>
+#include <string>
 #include <utility>
+#include <vector>
+
+#include "../src/re.h"
+#include "benchmark/benchmark.h"
 
 #define CONCAT2(x, y) x##y
 #define CONCAT(x, y) CONCAT2(x, y)
 
-#define ADD_CASES(...) \
-    int CONCAT(dummy, __LINE__) = ::AddCases(__VA_ARGS__)
+#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = ::AddCases(__VA_ARGS__)
 
 #define SET_SUBSTITUTIONS(...) \
-    int CONCAT(dummy, __LINE__) = ::SetSubstitutions(__VA_ARGS__)
+  int CONCAT(dummy, __LINE__) = ::SetSubstitutions(__VA_ARGS__)
 
 enum MatchRules {
-  MR_Default, // Skip non-matching lines until a match is found.
-  MR_Next,    // Match must occur on the next line.
-  MR_Not      // No line between the current position and the next match matches
-              // the regex
+  MR_Default,  // Skip non-matching lines until a match is found.
+  MR_Next,     // Match must occur on the next line.
+  MR_Not  // No line between the current position and the next match matches
+          // the regex
 };
 
 struct TestCase {
@@ -43,7 +43,7 @@ enum TestCaseID {
   TC_CSVOut,
   TC_CSVErr,
 
-  TC_NumID // PRIVATE
+  TC_NumID  // PRIVATE
 };
 
 // Add a list of test cases to be run against the output specified by
@@ -66,7 +66,6 @@ namespace {
 
 const char* const dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";
 
-} //  end namespace
+}  //  end namespace
 
-
-#endif // TEST_OUTPUT_TEST_H
\ No newline at end of file
+#endif  // TEST_OUTPUT_TEST_H
diff --git a/utils/google-benchmark/test/output_test_helper.cc b/utils/google-benchmark/test/output_test_helper.cc
index 4bd95efe0..721d39f92 100644
--- a/utils/google-benchmark/test/output_test_helper.cc
+++ b/utils/google-benchmark/test/output_test_helper.cc
@@ -1,16 +1,17 @@
-#include "output_test.h"
-#include "../src/check.h" // NOTE: check.h is for internal use only!
-#include "../src/re.h" // NOTE: re.h is for internal use only
-#include <memory>
-#include <map>
 #include <iostream>
+#include <map>
+#include <memory>
 #include <sstream>
 
+#include "../src/check.h"  // NOTE: check.h is for internal use only!
+#include "../src/re.h"     // NOTE: re.h is for internal use only
+#include "output_test.h"
 
 // ========================================================================= //
 // ------------------------------ Internals -------------------------------- //
 // ========================================================================= //
-namespace internal { namespace {
+namespace internal {
+namespace {
 
 using TestCaseList = std::vector<TestCase>;
 
@@ -22,91 +23,97 @@ using TestCaseList = std::vector<TestCase>;
 using SubMap = std::vector<std::pair<std::string, std::string>>;
 
 TestCaseList& GetTestCaseList(TestCaseID ID) {
-    // Uses function-local statics to ensure initialization occurs
-    // before first use.
-    static TestCaseList lists[TC_NumID];
-    return lists[ID];
+  // Uses function-local statics to ensure initialization occurs
+  // before first use.
+  static TestCaseList lists[TC_NumID];
+  return lists[ID];
 }
 
 SubMap& GetSubstitutions() {
-    // Don't use 'dec_re' from header because it may not yet be initialized.
-    static std::string dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";
-    static SubMap map = {
-        {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"},
-        {"%int", "[ ]*[0-9]+"},
-        {" %s ", "[ ]+"},
-        {"%time", "[ ]*[0-9]{1,5} ns"},
-        {"%console_report", "[ ]*[0-9]{1,5} ns [ ]*[0-9]{1,5} ns [ ]*[0-9]+"},
-        {"%csv_report", "[0-9]+," + dec_re + "," + dec_re + ",ns,,,,,"}
-    };
-    return map;
+  // Don't use 'dec_re' from header because it may not yet be initialized.
+  static std::string dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";
+  static SubMap map = {
+      {"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"},
+      {"%int", "[ ]*[0-9]+"},
+      {" %s ", "[ ]+"},
+      {"%time", "[ ]*[0-9]{1,5} ns"},
+      {"%console_report", "[ ]*[0-9]{1,5} ns [ ]*[0-9]{1,5} ns [ ]*[0-9]+"},
+      {"%console_us_report", "[ ]*[0-9] us [ ]*[0-9] us [ ]*[0-9]+"},
+      {"%csv_report", "[0-9]+," + dec_re + "," + dec_re + ",ns,,,,,"},
+      {"%csv_us_report", "[0-9]+," + dec_re + "," + dec_re + ",us,,,,,"},
+      {"%csv_bytes_report",
+       "[0-9]+," + dec_re + "," + dec_re + ",ns," + dec_re + ",,,,"},
+      {"%csv_items_report",
+       "[0-9]+," + dec_re + "," + dec_re + ",ns,," + dec_re + ",,,"},
+      {"%csv_label_report_begin", "[0-9]+," + dec_re + "," + dec_re + ",ns,,,"},
+      {"%csv_label_report_end", ",,"}};
+  return map;
 }
 
 std::string PerformSubstitutions(std::string source) {
-    SubMap const& subs = GetSubstitutions();
-    using SizeT = std::string::size_type;
-    for (auto const& KV : subs) {
-        SizeT pos;
-        SizeT next_start = 0;
-        while ((pos = source.find(KV.first, next_start)) != std::string::npos) {
-            next_start = pos + KV.second.size();
-            source.replace(pos, KV.first.size(), KV.second);
-        }
+  SubMap const& subs = GetSubstitutions();
+  using SizeT = std::string::size_type;
+  for (auto const& KV : subs) {
+    SizeT pos;
+    SizeT next_start = 0;
+    while ((pos = source.find(KV.first, next_start)) != std::string::npos) {
+      next_start = pos + KV.second.size();
+      source.replace(pos, KV.first.size(), KV.second);
     }
-    return source;
+  }
+  return source;
 }
 
 void CheckCase(std::stringstream& remaining_output, TestCase const& TC,
-               TestCaseList const& not_checks)
-{
-    std::string first_line;
-    bool on_first = true;
-    std::string line;
-    while (remaining_output.eof() == false) {
-        CHECK(remaining_output.good());
-        std::getline(remaining_output, line);
-        if (on_first) {
-            first_line = line;
-            on_first = false;
-        }
-        for (auto& NC : not_checks) {
-            CHECK(!NC.regex->Match(line))
-                << "Unexpected match for line \"" << line
-                << "\" for MR_Not regex \"" << NC.regex_str << "\""
-                << "\n    actual regex string \"" << TC.substituted_regex << "\""
-                << "\n    started matching near: " << first_line;
-        }
-        if (TC.regex->Match(line)) return;
-        CHECK(TC.match_rule != MR_Next)
-            << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str << "\""
-            << "\n    actual regex string \"" << TC.substituted_regex << "\""
-            << "\n    started matching near: " << first_line;
+               TestCaseList const& not_checks) {
+  std::string first_line;
+  bool on_first = true;
+  std::string line;
+  while (remaining_output.eof() == false) {
+    CHECK(remaining_output.good());
+    std::getline(remaining_output, line);
+    if (on_first) {
+      first_line = line;
+      on_first = false;
     }
-    CHECK(remaining_output.eof() == false)
-        << "End of output reached before match for regex \"" << TC.regex_str
-        << "\" was found"
+    for (const auto& NC : not_checks) {
+      CHECK(!NC.regex->Match(line))
+          << "Unexpected match for line \"" << line << "\" for MR_Not regex \""
+          << NC.regex_str << "\""
+          << "\n    actual regex string \"" << TC.substituted_regex << "\""
+          << "\n    started matching near: " << first_line;
+    }
+    if (TC.regex->Match(line)) return;
+    CHECK(TC.match_rule != MR_Next)
+        << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str
+        << "\""
         << "\n    actual regex string \"" << TC.substituted_regex << "\""
         << "\n    started matching near: " << first_line;
+  }
+  CHECK(remaining_output.eof() == false)
+      << "End of output reached before match for regex \"" << TC.regex_str
+      << "\" was found"
+      << "\n    actual regex string \"" << TC.substituted_regex << "\""
+      << "\n    started matching near: " << first_line;
 }
 
-
 void CheckCases(TestCaseList const& checks, std::stringstream& output) {
-    std::vector<TestCase> not_checks;
-    for (size_t i=0; i < checks.size(); ++i) {
-        const auto& TC = checks[i];
-        if (TC.match_rule == MR_Not) {
-            not_checks.push_back(TC);
-            continue;
-        }
-        CheckCase(output, TC, not_checks);
-        not_checks.clear();
+  std::vector<TestCase> not_checks;
+  for (size_t i = 0; i < checks.size(); ++i) {
+    const auto& TC = checks[i];
+    if (TC.match_rule == MR_Not) {
+      not_checks.push_back(TC);
+      continue;
     }
+    CheckCase(output, TC, not_checks);
+    not_checks.clear();
+  }
 }
 
 class TestReporter : public benchmark::BenchmarkReporter {
-public:
+ public:
   TestReporter(std::vector<benchmark::BenchmarkReporter*> reps)
-      : reporters_(reps)  {}
+      : reporters_(reps) {}
 
   virtual bool ReportContext(const Context& context) {
     bool last_ret = false;
@@ -118,56 +125,62 @@ public:
       first = false;
       last_ret = new_ret;
     }
+    (void)first;
     return last_ret;
   }
 
-  void ReportRuns(const std::vector<Run>& report)
-    { for (auto rep : reporters_) rep->ReportRuns(report); }
-  void Finalize() { for (auto rep : reporters_) rep->Finalize(); }
+  void ReportRuns(const std::vector<Run>& report) {
+    for (auto rep : reporters_) rep->ReportRuns(report);
+  }
+  void Finalize() {
+    for (auto rep : reporters_) rep->Finalize();
+  }
 
-private:
-  std::vector<benchmark::BenchmarkReporter*> reporters_;
+ private:
+  std::vector<benchmark::BenchmarkReporter *> reporters_;
 };
-
-}} // end namespace internal
+}
+}  // end namespace internal
 
 // ========================================================================= //
 // -------------------------- Public API Definitions------------------------ //
 // ========================================================================= //
 
 TestCase::TestCase(std::string re, int rule)
-    : regex_str(std::move(re)), match_rule(rule),
+    : regex_str(std::move(re)),
+      match_rule(rule),
       substituted_regex(internal::PerformSubstitutions(regex_str)),
-      regex(std::make_shared<benchmark::Regex>())
-{
-    std::string err_str;
-    regex->Init(substituted_regex, &err_str);
-    CHECK(err_str.empty())
-        << "Could not construct regex \"" << substituted_regex << "\""
-        << "\n    originally \"" << regex_str << "\""
-        << "\n    got error: " << err_str;
+      regex(std::make_shared<benchmark::Regex>()) {
+  std::string err_str;
+  regex->Init(substituted_regex,& err_str);
+  CHECK(err_str.empty()) << "Could not construct regex \"" << substituted_regex
+                         << "\""
+                         << "\n    originally \"" << regex_str << "\""
+                         << "\n    got error: " << err_str;
 }
 
 int AddCases(TestCaseID ID, std::initializer_list<TestCase> il) {
-    auto& L = internal::GetTestCaseList(ID);
-    L.insert(L.end(), il);
-    return 0;
+  auto& L = internal::GetTestCaseList(ID);
+  L.insert(L.end(), il);
+  return 0;
 }
 
-int SetSubstitutions(std::initializer_list<std::pair<std::string, std::string>> il) {
-    auto& subs = internal::GetSubstitutions();
-    for (auto const& KV : il) {
-        bool exists = false;
-        for (auto& EKV : subs) {
-            if (EKV.first == KV.first) {
-                EKV.second = KV.second;
-                exists = true;
-                break;
-            }
-        }
-        if (!exists) subs.push_back(KV);
+int SetSubstitutions(
+    std::initializer_list<std::pair<std::string, std::string>> il) {
+  auto& subs = internal::GetSubstitutions();
+  for (auto KV : il) {
+    bool exists = false;
+    KV.second = internal::PerformSubstitutions(KV.second);
+    for (auto& EKV : subs) {
+      if (EKV.first == KV.first) {
+        EKV.second = std::move(KV.second);
+        exists = true;
+        break;
+      }
     }
-    return 0;
+    if (!exists) subs.push_back(std::move(KV));
+  }
+  return 0;
 }
 
 void RunOutputTests(int argc, char* argv[]) {
@@ -184,21 +197,20 @@ void RunOutputTests(int argc, char* argv[]) {
     std::stringstream out_stream;
     std::stringstream err_stream;
 
-    ReporterTest(const char* n,
-                 std::vector<TestCase>& out_tc,
+    ReporterTest(const char* n, std::vector<TestCase>& out_tc,
                  std::vector<TestCase>& err_tc,
                  benchmark::BenchmarkReporter& br)
         : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) {
-        reporter.SetOutputStream(&out_stream);
-        reporter.SetErrorStream(&err_stream);
+      reporter.SetOutputStream(&out_stream);
+      reporter.SetErrorStream(&err_stream);
     }
   } TestCases[] = {
       {"ConsoleReporter", GetTestCaseList(TC_ConsoleOut),
-                          GetTestCaseList(TC_ConsoleErr), CR},
-      {"JSONReporter",    GetTestCaseList(TC_JSONOut),
-                          GetTestCaseList(TC_JSONErr), JR},
-      {"CSVReporter",     GetTestCaseList(TC_CSVOut),
-                          GetTestCaseList(TC_CSVErr), CSVR},
+       GetTestCaseList(TC_ConsoleErr), CR},
+      {"JSONReporter", GetTestCaseList(TC_JSONOut), GetTestCaseList(TC_JSONErr),
+       JR},
+      {"CSVReporter", GetTestCaseList(TC_CSVOut), GetTestCaseList(TC_CSVErr),
+       CSVR},
   };
 
   // Create the test reporter and run the benchmarks.
@@ -207,18 +219,16 @@ void RunOutputTests(int argc, char* argv[]) {
   benchmark::RunSpecifiedBenchmarks(&test_rep);
 
   for (auto& rep_test : TestCases) {
-      std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n";
-      std::string banner(msg.size() - 1, '-');
-      std::cout << banner << msg << banner << "\n";
+    std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n";
+    std::string banner(msg.size() - 1, '-');
+    std::cout << banner << msg << banner << "\n";
 
-      std::cerr << rep_test.err_stream.str();
-      std::cout << rep_test.out_stream.str();
+    std::cerr << rep_test.err_stream.str();
+    std::cout << rep_test.out_stream.str();
 
-      internal::CheckCases(rep_test.error_cases,rep_test.err_stream);
-      internal::CheckCases(rep_test.output_cases, rep_test.out_stream);
+    internal::CheckCases(rep_test.error_cases, rep_test.err_stream);
+    internal::CheckCases(rep_test.output_cases, rep_test.out_stream);
 
-      std::cout << "\n";
+    std::cout << "\n";
   }
 }
-
-
diff --git a/utils/google-benchmark/test/register_benchmark_test.cc b/utils/google-benchmark/test/register_benchmark_test.cc
index f25ae85ae..e9f8ea530 100644
--- a/utils/google-benchmark/test/register_benchmark_test.cc
+++ b/utils/google-benchmark/test/register_benchmark_test.cc
@@ -1,14 +1,15 @@
 
 #undef NDEBUG
-#include "benchmark/benchmark.h"
-#include "../src/check.h" // NOTE: check.h is for internal use only!
 #include <cassert>
 #include <vector>
 
+#include "../src/check.h"  // NOTE: check.h is for internal use only!
+#include "benchmark/benchmark.h"
+
 namespace {
 
 class TestReporter : public benchmark::ConsoleReporter {
-public:
+ public:
   virtual void ReportRuns(const std::vector<Run>& report) {
     all_runs_.insert(all_runs_.end(), begin(report), end(report));
     ConsoleReporter::ReportRuns(report);
@@ -20,18 +21,19 @@ public:
 struct TestCase {
   std::string name;
   const char* label;
-  TestCase(const char* xname) : name(xname), label(nullptr) {}
+  // Note: not explicit as we rely on it being converted through ADD_CASES.
+  TestCase(const char* xname) : TestCase(xname, nullptr) {}
   TestCase(const char* xname, const char* xlabel)
-    : name(xname), label(xlabel) {}
+      : name(xname), label(xlabel) {}
 
   typedef benchmark::BenchmarkReporter::Run Run;
 
   void CheckRun(Run const& run) const {
-    CHECK(name == run.benchmark_name) << "expected " << name
-                                      << " got " << run.benchmark_name;
+    CHECK(name == run.benchmark_name) << "expected " << name << " got "
+                                      << run.benchmark_name;
     if (label) {
-      CHECK(run.report_label == label) << "expected " << label
-                                       << " got " << run.report_label;
+      CHECK(run.report_label == label) << "expected " << label << " got "
+                                       << run.report_label;
     } else {
       CHECK(run.report_label == "");
     }
@@ -49,8 +51,7 @@ int AddCases(std::initializer_list<TestCase> const& v) {
 
 #define CONCAT(x, y) CONCAT2(x, y)
 #define CONCAT2(x, y) x##y
-#define ADD_CASES(...) \
-int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__})
+#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__})
 
 }  // end namespace
 
@@ -59,11 +60,13 @@ typedef benchmark::internal::Benchmark* ReturnVal;
 //----------------------------------------------------------------------------//
 // Test RegisterBenchmark with no additional arguments
 //----------------------------------------------------------------------------//
-void BM_function(benchmark::State& state) { while (state.KeepRunning()) {} }
+void BM_function(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+}
 BENCHMARK(BM_function);
 ReturnVal dummy = benchmark::RegisterBenchmark(
-    "BM_function_manual_registration",
-     BM_function);
+    "BM_function_manual_registration", BM_function);
 ADD_CASES({"BM_function"}, {"BM_function_manual_registration"});
 
 //----------------------------------------------------------------------------//
@@ -74,27 +77,21 @@ ADD_CASES({"BM_function"}, {"BM_function_manual_registration"});
 #ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
 
 void BM_extra_args(benchmark::State& st, const char* label) {
-  while (st.KeepRunning()) {}
+  while (st.KeepRunning()) {
+  }
   st.SetLabel(label);
 }
 int RegisterFromFunction() {
   std::pair<const char*, const char*> cases[] = {
-      {"test1", "One"},
-      {"test2", "Two"},
-      {"test3", "Three"}
-  };
-  for (auto& c : cases)
+      {"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}};
+  for (auto const& c : cases)
     benchmark::RegisterBenchmark(c.first, &BM_extra_args, c.second);
   return 0;
 }
 int dummy2 = RegisterFromFunction();
-ADD_CASES(
-  {"test1", "One"},
-  {"test2", "Two"},
-  {"test3", "Three"}
-);
+ADD_CASES({"test1", "One"}, {"test2", "Two"}, {"test3", "Three"});
 
-#endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
+#endif  // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
 
 //----------------------------------------------------------------------------//
 // Test RegisterBenchmark with different callable types
@@ -102,7 +99,8 @@ ADD_CASES(
 
 struct CustomFixture {
   void operator()(benchmark::State& st) {
-    while (st.KeepRunning()) {}
+    while (st.KeepRunning()) {
+    }
   }
 };
 
@@ -118,7 +116,8 @@ void TestRegistrationAtRuntime() {
   {
     int x = 42;
     auto capturing_lam = [=](benchmark::State& st) {
-      while (st.KeepRunning()) {}
+      while (st.KeepRunning()) {
+      }
       st.SetLabel(std::to_string(x));
     };
     benchmark::RegisterBenchmark("lambda_benchmark", capturing_lam);
diff --git a/utils/google-benchmark/test/reporter_output_test.cc b/utils/google-benchmark/test/reporter_output_test.cc
index 00f02f264..2e6d2b2a0 100644
--- a/utils/google-benchmark/test/reporter_output_test.cc
+++ b/utils/google-benchmark/test/reporter_output_test.cc
@@ -1,69 +1,162 @@
 
 #undef NDEBUG
-#include "benchmark/benchmark.h"
-#include "output_test.h"
 #include <utility>
 
+#include "benchmark/benchmark.h"
+#include "output_test.h"
 
 // ========================================================================= //
 // ---------------------- Testing Prologue Output -------------------------- //
 // ========================================================================= //
 
-ADD_CASES(TC_ConsoleOut, {
-    {"^Benchmark %s Time %s CPU %s Iterations$", MR_Next},
-    {"^[-]+$", MR_Next}
-});
-ADD_CASES(TC_CSVOut, {
-  {"name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second,"
-    "label,error_occurred,error_message"}
-});
+ADD_CASES(TC_ConsoleOut, {{"^Benchmark %s Time %s CPU %s Iterations$", MR_Next},
+                          {"^[-]+$", MR_Next}});
+ADD_CASES(TC_CSVOut,
+          {{"name,iterations,real_time,cpu_time,time_unit,bytes_per_second,"
+            "items_per_second,label,error_occurred,error_message"}});
 
 // ========================================================================= //
 // ------------------------ Testing Basic Output --------------------------- //
 // ========================================================================= //
 
 void BM_basic(benchmark::State& state) {
-  while (state.KeepRunning()) {}
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_basic);
 
-ADD_CASES(TC_ConsoleOut, {
-    {"^BM_basic %console_report$"}
-});
-ADD_CASES(TC_JSONOut, {
-    {"\"name\": \"BM_basic\",$"},
-    {"\"iterations\": %int,$", MR_Next},
-    {"\"real_time\": %int,$", MR_Next},
-    {"\"cpu_time\": %int,$", MR_Next},
-    {"\"time_unit\": \"ns\"$", MR_Next},
-    {"}", MR_Next}
-});
-ADD_CASES(TC_CSVOut, {
-    {"^\"BM_basic\",%csv_report$"}
-});
+ADD_CASES(TC_ConsoleOut, {{"^BM_basic %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_basic\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %int,$", MR_Next},
+                       {"\"cpu_time\": %int,$", MR_Next},
+                       {"\"time_unit\": \"ns\"$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}});
+
+// ========================================================================= //
+// ------------------------ Testing Bytes per Second Output ---------------- //
+// ========================================================================= //
+
+void BM_bytes_per_second(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+  state.SetBytesProcessed(1);
+}
+BENCHMARK(BM_bytes_per_second);
+
+ADD_CASES(TC_ConsoleOut,
+          {{"^BM_bytes_per_second %console_report +%floatB/s$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_bytes_per_second\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %int,$", MR_Next},
+                       {"\"cpu_time\": %int,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"bytes_per_second\": %int$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}});
+
+// ========================================================================= //
+// ------------------------ Testing Items per Second Output ---------------- //
+// ========================================================================= //
+
+void BM_items_per_second(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+  state.SetItemsProcessed(1);
+}
+BENCHMARK(BM_items_per_second);
+
+ADD_CASES(TC_ConsoleOut,
+          {{"^BM_items_per_second %console_report +%float items/s$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_items_per_second\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %int,$", MR_Next},
+                       {"\"cpu_time\": %int,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"items_per_second\": %int$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_items_per_second\",%csv_items_report$"}});
+
+// ========================================================================= //
+// ------------------------ Testing Label Output --------------------------- //
+// ========================================================================= //
+
+void BM_label(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+  state.SetLabel("some label");
+}
+BENCHMARK(BM_label);
+
+ADD_CASES(TC_ConsoleOut, {{"^BM_label %console_report some label$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %int,$", MR_Next},
+                       {"\"cpu_time\": %int,$", MR_Next},
+                       {"\"time_unit\": \"ns\",$", MR_Next},
+                       {"\"label\": \"some label\"$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_label\",%csv_label_report_begin\"some "
+                       "label\"%csv_label_report_end$"}});
 
 // ========================================================================= //
 // ------------------------ Testing Error Output --------------------------- //
 // ========================================================================= //
 
 void BM_error(benchmark::State& state) {
-    state.SkipWithError("message");
-    while(state.KeepRunning()) {}
+  state.SkipWithError("message");
+  while (state.KeepRunning()) {
+  }
 }
 BENCHMARK(BM_error);
-ADD_CASES(TC_ConsoleOut, {
-    {"^BM_error[ ]+ERROR OCCURRED: 'message'$"}
-});
-ADD_CASES(TC_JSONOut, {
-    {"\"name\": \"BM_error\",$"},
-    {"\"error_occurred\": true,$", MR_Next},
-    {"\"error_message\": \"message\",$", MR_Next}
-});
+ADD_CASES(TC_ConsoleOut, {{"^BM_error[ ]+ERROR OCCURRED: 'message'$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_error\",$"},
+                       {"\"error_occurred\": true,$", MR_Next},
+                       {"\"error_message\": \"message\",$", MR_Next}});
 
-ADD_CASES(TC_CSVOut, {
-    {"^\"BM_error\",,,,,,,,true,\"message\"$"}
-});
+ADD_CASES(TC_CSVOut, {{"^\"BM_error\",,,,,,,,true,\"message\"$"}});
 
+// ========================================================================= //
+// ------------------------ Testing No Arg Name Output -----------------------
+// //
+// ========================================================================= //
+
+void BM_no_arg_name(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+}
+BENCHMARK(BM_no_arg_name)->Arg(3);
+ADD_CASES(TC_ConsoleOut, {{"^BM_no_arg_name/3 %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_no_arg_name/3\",$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}});
+
+// ========================================================================= //
+// ------------------------ Testing Arg Name Output ----------------------- //
+// ========================================================================= //
+
+void BM_arg_name(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+}
+BENCHMARK(BM_arg_name)->ArgName("first")->Arg(3);
+ADD_CASES(TC_ConsoleOut, {{"^BM_arg_name/first:3 %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_name/first:3\",$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_arg_name/first:3\",%csv_report$"}});
+
+// ========================================================================= //
+// ------------------------ Testing Arg Names Output ----------------------- //
+// ========================================================================= //
+
+void BM_arg_names(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+}
+BENCHMARK(BM_arg_names)->Args({2, 5, 4})->ArgNames({"first", "", "third"});
+ADD_CASES(TC_ConsoleOut,
+          {{"^BM_arg_names/first:2/5/third:4 %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_names/first:2/5/third:4\",$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_arg_names/first:2/5/third:4\",%csv_report$"}});
 
 // ========================================================================= //
 // ----------------------- Testing Complexity Output ----------------------- //
@@ -74,85 +167,90 @@ void BM_Complexity_O1(benchmark::State& state) {
   }
   state.SetComplexityN(state.range(0));
 }
-BENCHMARK(BM_Complexity_O1)->Range(1, 1<<18)->Complexity(benchmark::o1);
-SET_SUBSTITUTIONS({
-  {"%bigOStr", "[ ]*[0-9]+\\.[0-9]+ \\([0-9]+\\)"},
-  {"%RMS", "[ ]*[0-9]+ %"}
-});
-ADD_CASES(TC_ConsoleOut, {
-   {"^BM_Complexity_O1_BigO %bigOStr %bigOStr[ ]*$"},
-   {"^BM_Complexity_O1_RMS %RMS %RMS[ ]*$"}
-});
-
+BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1);
+SET_SUBSTITUTIONS({{"%bigOStr", "[ ]* %float \\([0-9]+\\)"},
+                   {"%RMS", "[ ]*[0-9]+ %"}});
+ADD_CASES(TC_ConsoleOut, {{"^BM_Complexity_O1_BigO %bigOStr %bigOStr[ ]*$"},
+                          {"^BM_Complexity_O1_RMS %RMS %RMS[ ]*$"}});
 
 // ========================================================================= //
 // ----------------------- Testing Aggregate Output ------------------------ //
 // ========================================================================= //
 
 // Test that non-aggregate data is printed by default
-void BM_Repeat(benchmark::State& state) { while (state.KeepRunning()) {} }
+void BM_Repeat(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+}
 BENCHMARK(BM_Repeat)->Repetitions(3);
-ADD_CASES(TC_ConsoleOut, {
-    {"^BM_Repeat/repeats:3 %console_report$"},
-    {"^BM_Repeat/repeats:3 %console_report$"},
-    {"^BM_Repeat/repeats:3 %console_report$"},
-    {"^BM_Repeat/repeats:3_mean %console_report$"},
-    {"^BM_Repeat/repeats:3_stddev %console_report$"}
-});
-ADD_CASES(TC_JSONOut, {
-    {"\"name\": \"BM_Repeat/repeats:3\",$"},
-    {"\"name\": \"BM_Repeat/repeats:3\",$"},
-    {"\"name\": \"BM_Repeat/repeats:3\",$"},
-    {"\"name\": \"BM_Repeat/repeats:3_mean\",$"},
-    {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}
-});
-ADD_CASES(TC_CSVOut, {
-    {"^\"BM_Repeat/repeats:3\",%csv_report$"},
-    {"^\"BM_Repeat/repeats:3\",%csv_report$"},
-    {"^\"BM_Repeat/repeats:3\",%csv_report$"},
-    {"^\"BM_Repeat/repeats:3_mean\",%csv_report$"},
-    {"^\"BM_Repeat/repeats:3_stddev\",%csv_report$"}
-});
+ADD_CASES(TC_ConsoleOut, {{"^BM_Repeat/repeats:3 %console_report$"},
+                          {"^BM_Repeat/repeats:3 %console_report$"},
+                          {"^BM_Repeat/repeats:3 %console_report$"},
+                          {"^BM_Repeat/repeats:3_mean %console_report$"},
+                          {"^BM_Repeat/repeats:3_stddev %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:3\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:3\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:3_mean\",$"},
+                       {"\"name\": \"BM_Repeat/repeats:3_stddev\",$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:3\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:3\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:3_mean\",%csv_report$"},
+                      {"^\"BM_Repeat/repeats:3_stddev\",%csv_report$"}});
 
 // Test that a non-repeated test still prints non-aggregate results even when
 // only-aggregate reports have been requested
-void BM_RepeatOnce(benchmark::State& state) { while (state.KeepRunning()) {} }
+void BM_RepeatOnce(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+}
 BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly();
-ADD_CASES(TC_ConsoleOut, {
-    {"^BM_RepeatOnce/repeats:1 %console_report$"}
-});
-ADD_CASES(TC_JSONOut, {
-    {"\"name\": \"BM_RepeatOnce/repeats:1\",$"}
-});
-ADD_CASES(TC_CSVOut, {
-    {"^\"BM_RepeatOnce/repeats:1\",%csv_report$"}
-});
-
+ADD_CASES(TC_ConsoleOut, {{"^BM_RepeatOnce/repeats:1 %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_RepeatOnce/repeats:1\",$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_RepeatOnce/repeats:1\",%csv_report$"}});
 
 // Test that non-aggregate data is not reported
-void BM_SummaryRepeat(benchmark::State& state) { while (state.KeepRunning()) {} }
+void BM_SummaryRepeat(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+}
 BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly();
-ADD_CASES(TC_ConsoleOut, {
-    {".*BM_SummaryRepeat/repeats:3 ", MR_Not},
-    {"^BM_SummaryRepeat/repeats:3_mean %console_report$"},
-    {"^BM_SummaryRepeat/repeats:3_stddev %console_report$"}
-});
-ADD_CASES(TC_JSONOut, {
-    {".*BM_SummaryRepeat/repeats:3 ", MR_Not},
-    {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"},
-    {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}
-});
-ADD_CASES(TC_CSVOut, {
-    {".*BM_SummaryRepeat/repeats:3 ", MR_Not},
-    {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"},
-    {"^\"BM_SummaryRepeat/repeats:3_stddev\",%csv_report$"}
-});
+ADD_CASES(TC_ConsoleOut,
+          {{".*BM_SummaryRepeat/repeats:3 ", MR_Not},
+           {"^BM_SummaryRepeat/repeats:3_mean %console_report$"},
+           {"^BM_SummaryRepeat/repeats:3_stddev %console_report$"}});
+ADD_CASES(TC_JSONOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not},
+                       {"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"},
+                       {"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"}});
+ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not},
+                      {"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"},
+                      {"^\"BM_SummaryRepeat/repeats:3_stddev\",%csv_report$"}});
+
+void BM_RepeatTimeUnit(benchmark::State& state) {
+  while (state.KeepRunning()) {
+  }
+}
+BENCHMARK(BM_RepeatTimeUnit)
+    ->Repetitions(3)
+    ->ReportAggregatesOnly()
+    ->Unit(benchmark::kMicrosecond);
+ADD_CASES(TC_ConsoleOut,
+          {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not},
+           {"^BM_RepeatTimeUnit/repeats:3_mean %console_us_report$"},
+           {"^BM_RepeatTimeUnit/repeats:3_stddev %console_us_report$"}});
+ADD_CASES(TC_JSONOut, {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not},
+                       {"\"name\": \"BM_RepeatTimeUnit/repeats:3_mean\",$"},
+                       {"\"time_unit\": \"us\",?$"},
+                       {"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"},
+                       {"\"time_unit\": \"us\",?$"}});
+ADD_CASES(TC_CSVOut,
+          {{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not},
+           {"^\"BM_RepeatTimeUnit/repeats:3_mean\",%csv_us_report$"},
+           {"^\"BM_RepeatTimeUnit/repeats:3_stddev\",%csv_us_report$"}});
 
 // ========================================================================= //
 // --------------------------- TEST CASES END ------------------------------ //
 // ========================================================================= //
 
-
-int main(int argc, char* argv[]) {
-  RunOutputTests(argc, argv);
-}
+int main(int argc, char* argv[]) { RunOutputTests(argc, argv); }
diff --git a/utils/google-benchmark/test/skip_with_error_test.cc b/utils/google-benchmark/test/skip_with_error_test.cc
index 1ad136656..b74d33c58 100644
--- a/utils/google-benchmark/test/skip_with_error_test.cc
+++ b/utils/google-benchmark/test/skip_with_error_test.cc
@@ -1,10 +1,11 @@
 
 #undef NDEBUG
-#include "benchmark/benchmark.h"
-#include "../src/check.h" // NOTE: check.h is for internal use only!
 #include <cassert>
 #include <vector>
 
+#include "../src/check.h"  // NOTE: check.h is for internal use only!
+#include "benchmark/benchmark.h"
+
 namespace {
 
 class TestReporter : public benchmark::ConsoleReporter {
@@ -18,7 +19,7 @@ class TestReporter : public benchmark::ConsoleReporter {
     ConsoleReporter::ReportRuns(report);
   }
 
-  TestReporter()  {}
+  TestReporter() {}
   virtual ~TestReporter() {}
 
   mutable std::vector<Run> all_runs_;
@@ -32,11 +33,12 @@ struct TestCase {
   typedef benchmark::BenchmarkReporter::Run Run;
 
   void CheckRun(Run const& run) const {
-    CHECK(name == run.benchmark_name) << "expected " << name << " got " << run.benchmark_name;
+    CHECK(name == run.benchmark_name) << "expected " << name << " got "
+                                      << run.benchmark_name;
     CHECK(error_occurred == run.error_occurred);
     CHECK(error_message == run.error_message);
     if (error_occurred) {
-      //CHECK(run.iterations == 0);
+      // CHECK(run.iterations == 0);
     } else {
       CHECK(run.iterations != 0);
     }
@@ -55,12 +57,10 @@ int AddCases(const char* base_name, std::initializer_list<TestCase> const& v) {
 
 #define CONCAT(x, y) CONCAT2(x, y)
 #define CONCAT2(x, y) x##y
-#define ADD_CASES(...) \
-int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)
+#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)
 
 }  // end namespace
 
-
 void BM_error_before_running(benchmark::State& state) {
   state.SkipWithError("error message");
   while (state.KeepRunning()) {
@@ -68,8 +68,7 @@ void BM_error_before_running(benchmark::State& state) {
   }
 }
 BENCHMARK(BM_error_before_running);
-ADD_CASES("BM_error_before_running",
-          {{"", true, "error message"}});
+ADD_CASES("BM_error_before_running", {{"", true, "error message"}});
 
 void BM_error_during_running(benchmark::State& state) {
   int first_iter = true;
@@ -85,17 +84,14 @@ void BM_error_during_running(benchmark::State& state) {
   }
 }
 BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8);
-ADD_CASES(
-    "BM_error_during_running",
-    {{"/1/threads:1", true, "error message"},
-    {"/1/threads:2", true, "error message"},
-    {"/1/threads:4", true, "error message"},
-    {"/1/threads:8", true, "error message"},
-    {"/2/threads:1", false, ""},
-    {"/2/threads:2", false, ""},
-    {"/2/threads:4", false, ""},
-    {"/2/threads:8", false, ""}}
-);
+ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"},
+                                      {"/1/threads:2", true, "error message"},
+                                      {"/1/threads:4", true, "error message"},
+                                      {"/1/threads:8", true, "error message"},
+                                      {"/2/threads:1", false, ""},
+                                      {"/2/threads:2", false, ""},
+                                      {"/2/threads:4", false, ""},
+                                      {"/2/threads:8", false, ""}});
 
 void BM_error_after_running(benchmark::State& state) {
   while (state.KeepRunning()) {
@@ -105,13 +101,10 @@ void BM_error_after_running(benchmark::State& state) {
     state.SkipWithError("error message");
 }
 BENCHMARK(BM_error_after_running)->ThreadRange(1, 8);
-ADD_CASES(
-    "BM_error_after_running",
-    {{"/threads:1", true, "error message"},
-    {"/threads:2", true, "error message"},
-    {"/threads:4", true, "error message"},
-    {"/threads:8", true, "error message"}}
-);
+ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"},
+                                     {"/threads:2", true, "error message"},
+                                     {"/threads:4", true, "error message"},
+                                     {"/threads:8", true, "error message"}});
 
 void BM_error_while_paused(benchmark::State& state) {
   bool first_iter = true;
@@ -128,18 +121,14 @@ void BM_error_while_paused(benchmark::State& state) {
   }
 }
 BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8);
-ADD_CASES(
-    "BM_error_while_paused",
-    {{"/1/threads:1", true, "error message"},
-    {"/1/threads:2", true, "error message"},
-    {"/1/threads:4", true, "error message"},
-    {"/1/threads:8", true, "error message"},
-    {"/2/threads:1", false, ""},
-    {"/2/threads:2", false, ""},
-    {"/2/threads:4", false, ""},
-    {"/2/threads:8", false, ""}}
-);
-
+ADD_CASES("BM_error_while_paused", {{"/1/threads:1", true, "error message"},
+                                    {"/1/threads:2", true, "error message"},
+                                    {"/1/threads:4", true, "error message"},
+                                    {"/1/threads:8", true, "error message"},
+                                    {"/2/threads:1", false, ""},
+                                    {"/2/threads:2", false, ""},
+                                    {"/2/threads:4", false, ""},
+                                    {"/2/threads:8", false, ""}});
 
 int main(int argc, char* argv[]) {
   benchmark::Initialize(&argc, argv);
diff --git a/utils/google-benchmark/tools/gbench/report.py b/utils/google-benchmark/tools/gbench/report.py
index 44fa4a563..ac69b9bef 100644
--- a/utils/google-benchmark/tools/gbench/report.py
+++ b/utils/google-benchmark/tools/gbench/report.py
@@ -59,6 +59,10 @@ def calculate_change(old_val, new_val):
     """
     Return a float representing the decimal change between old_val and new_val.
     """
+    if old_val == 0 and new_val == 0:
+        return 0.0
+    if old_val == 0:
+        return float(new_val - old_val) / (float(old_val + new_val) / 2)
     return float(new_val - old_val) / abs(old_val)
 
 
@@ -73,7 +77,7 @@ def generate_difference_report(json1, json2, use_color=True):
             if b['name'] == name:
                 return b
         return None
-    first_line = "{:<{}s}     Time           CPU".format(
+    first_line = "{:<{}s}     Time           CPU           Old           New".format(
         'Benchmark', first_col_width)
     output_strs = [first_line, '-' * len(first_line)]
     for bn in json1['benchmarks']:
@@ -88,12 +92,13 @@ def generate_difference_report(json1, json2, use_color=True):
                 return BC_WHITE
             else:
                 return BC_CYAN
-        fmt_str = "{}{:<{}s}{endc}    {}{:+.2f}{endc}         {}{:+.2f}{endc}"
+        fmt_str = "{}{:<{}s}{endc}    {}{:+.2f}{endc}         {}{:+.2f}{endc}         {:4d}         {:4d}"
         tres = calculate_change(bn['real_time'], other_bench['real_time'])
         cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
         output_strs += [color_format(use_color, fmt_str,
             BC_HEADER, bn['name'], first_col_width,
             get_color(tres), tres, get_color(cpures), cpures,
+            bn['cpu_time'], other_bench['cpu_time'],
             endc=BC_ENDC)]
     return output_strs