Files
android_external_libcxx/utils/google-benchmark/src/sysinfo.cc
Eric Fiselier d9b9ef75a8 [libcxx] Add support for benchmark tests using Google Benchmark.
Summary:
This patch does the following:

1. Checks in a copy of the Google Benchmark library into the libc++ repo under `utils/google-benchmark`.
2. Teaches libc++ how to build Google Benchmark against both (A) in-tree libc++ and (B) the platforms native STL.
3. Allows performance benchmarks to be built as part of the libc++ build.

Building the benchmarks (and Google Benchmark) is off by default. It must be enabled using the CMake option `-DLIBCXX_INCLUDE_BENCHMARKS=ON`. When this option is enabled the tests under `libcxx/benchmarks`  can be built using the `libcxx-benchmarks` target.

On Linux platforms where libstdc++ is the default STL the CMake option `-DLIBCXX_BUILD_BENCHMARKS_NATIVE_STDLIB=ON` can be used to build each benchmark test against libstdc++ as well. This is useful for comparing performance between standard libraries.

Support for benchmarks is currently very minimal. They must be manually run by the user and there is no mechanism for detecting performance regressions.

Known Issues:

* `-DLIBCXX_INCLUDE_BENCHMARKS=ON` is only supported for Clang, and not GCC, since the `-stdlib=libc++` option is needed to build Google Benchmark.








Reviewers: danalbert, dberlin, chandlerc, mclow.lists, jroelofs

Subscribers: chandlerc, dberlin, tberghammer, danalbert, srhines, hfinkel

Differential Revision: https://reviews.llvm.org/D22240

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@276049 91177308-0d34-0410-b5e6-96231b3b80d8
2016-07-19 23:07:03 +00:00

421 lines
14 KiB
C++

// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "sysinfo.h"
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
#include <Shlwapi.h>
#include <Windows.h>
#include <VersionHelpers.h>
#else
#include <fcntl.h>
#include <sys/resource.h>
#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
#include <sys/time.h>
#include <unistd.h>
#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
#include <sys/sysctl.h>
#endif
#endif
#include <cerrno>
#include <cstdio>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <limits>
#include <mutex>
#include "arraysize.h"
#include "check.h"
#include "cycleclock.h"
#include "internal_macros.h"
#include "log.h"
#include "sleep.h"
#include "string_util.h"
namespace benchmark {
namespace {
std::once_flag cpuinfo_init;
double cpuinfo_cycles_per_second = 1.0;
int cpuinfo_num_cpus = 1; // Conservative guess
std::mutex cputimens_mutex;
#if !defined BENCHMARK_OS_MACOSX
const int64_t estimate_time_ms = 1000;
// Helper function estimates cycles/sec by observing cycles elapsed during
// sleep(). Using small sleep time decreases accuracy significantly.
int64_t EstimateCyclesPerSecond() {
const int64_t start_ticks = cycleclock::Now();
SleepForMilliseconds(estimate_time_ms);
return cycleclock::Now() - start_ticks;
}
#endif
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
// Helper function for reading an int from a file. Returns true if successful
// and the memory location pointed to by value is set to the value read.
bool ReadIntFromFile(const char* file, long* value) {
bool ret = false;
int fd = open(file, O_RDONLY);
if (fd != -1) {
char line[1024];
char* err;
memset(line, '\0', sizeof(line));
CHECK(read(fd, line, sizeof(line) - 1));
const long temp_value = strtol(line, &err, 10);
if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
*value = temp_value;
ret = true;
}
close(fd);
}
return ret;
}
#endif
void InitializeSystemInfo() {
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
char line[1024];
char* err;
long freq;
bool saw_mhz = false;
// If the kernel is exporting the tsc frequency use that. There are issues
// where cpuinfo_max_freq cannot be relied on because the BIOS may be
// exporintg an invalid p-state (on x86) or p-states may be used to put the
// processor in a new mode (turbo mode). Essentially, those frequencies
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
// well.
if (!saw_mhz &&
ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
// The value is in kHz (as the file name suggests). For example, on a
// 2GHz warpstation, the file contains the value "2000000".
cpuinfo_cycles_per_second = freq * 1000.0;
saw_mhz = true;
}
// If CPU scaling is in effect, we want to use the *maximum* frequency,
// not whatever CPU speed some random processor happens to be using now.
if (!saw_mhz &&
ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
&freq)) {
// The value is in kHz. For example, on a 2GHz warpstation, the file
// contains the value "2000000".
cpuinfo_cycles_per_second = freq * 1000.0;
saw_mhz = true;
}
// Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
const char* pname = "/proc/cpuinfo";
int fd = open(pname, O_RDONLY);
if (fd == -1) {
perror(pname);
if (!saw_mhz) {
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
}
return;
}
double bogo_clock = 1.0;
bool saw_bogo = false;
long max_cpu_id = 0;
int num_cpus = 0;
line[0] = line[1] = '\0';
size_t chars_read = 0;
do { // we'll exit when the last read didn't read anything
// Move the next line to the beginning of the buffer
const size_t oldlinelen = strlen(line);
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
line[0] = '\0';
else // still other lines left to save
memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
// Terminate the new line, reading more if we can't find the newline
char* newline = strchr(line, '\n');
if (newline == nullptr) {
const size_t linelen = strlen(line);
const size_t bytes_to_read = sizeof(line) - 1 - linelen;
CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
chars_read = read(fd, line + linelen, bytes_to_read);
line[linelen + chars_read] = '\0';
newline = strchr(line, '\n');
}
if (newline != nullptr) *newline = '\0';
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
// accept postive values. Some environments (virtual machines) report zero,
// which would cause infinite looping in WallTime_Init.
if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr) {
cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
saw_mhz = true;
}
} else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr) {
bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
saw_bogo = true;
}
} else if (strncmp(line, "processor", sizeof("processor") - 1) == 0) {
// The above comparison is case-sensitive because ARM kernels often
// include a "Processor" line that tells you about the CPU, distinct
// from the usual "processor" lines that give you CPU ids. No current
// Linux architecture is using "Processor" for CPU ids.
num_cpus++; // count up every time we see an "processor :" entry
const char* id_str = strchr(line, ':');
if (id_str) {
const long cpu_id = strtol(id_str + 1, &err, 10);
if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
max_cpu_id = cpu_id;
}
}
} while (chars_read > 0);
close(fd);
if (!saw_mhz) {
if (saw_bogo) {
// If we didn't find anything better, we'll use bogomips, but
// we're not happy about it.
cpuinfo_cycles_per_second = bogo_clock;
} else {
// If we don't even have bogomips, we'll use the slow estimation.
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
}
}
if (num_cpus == 0) {
fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
} else {
if ((max_cpu_id + 1) != num_cpus) {
fprintf(stderr,
"CPU ID assignments in /proc/cpuinfo seem messed up."
" This is usually caused by a bad BIOS.\n");
}
cpuinfo_num_cpus = num_cpus;
}
#elif defined BENCHMARK_OS_FREEBSD
// For this sysctl to work, the machine must be configured without
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
// and later. Before that, it's a 32-bit quantity (and gives the
// wrong answer on machines faster than 2^32 Hz). See
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
// But also compare FreeBSD 7.0:
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
// To FreeBSD 6.3 (it's the same in 6-STABLE):
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
#if __FreeBSD__ >= 7
uint64_t hz = 0;
#else
unsigned int hz = 0;
#endif
size_t sz = sizeof(hz);
const char* sysctl_path = "machdep.tsc_freq";
if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) {
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
sysctl_path, strerror(errno));
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
} else {
cpuinfo_cycles_per_second = hz;
}
// TODO: also figure out cpuinfo_num_cpus
#elif defined BENCHMARK_OS_WINDOWS
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
// then make a crude estimate.
DWORD data, data_size = sizeof(data);
if (IsWindowsXPOrGreater() &&
SUCCEEDED(
SHGetValueA(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"~MHz", nullptr, &data, &data_size)))
cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000)); // was mhz
else
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
// TODO: also figure out cpuinfo_num_cpus
#elif defined BENCHMARK_OS_MACOSX
// returning "mach time units" per second. the current number of elapsed
// mach time units can be found by calling uint64 mach_absolute_time();
// while not as precise as actual CPU cycles, it is accurate in the face
// of CPU frequency scaling and multi-cpu/core machines.
// Our mac users have these types of machines, and accuracy
// (i.e. correctness) trumps precision.
// See cycleclock.h: CycleClock::Now(), which returns number of mach time
// units on Mac OS X.
mach_timebase_info_data_t timebase_info;
mach_timebase_info(&timebase_info);
double mach_time_units_per_nanosecond =
static_cast<double>(timebase_info.denom) /
static_cast<double>(timebase_info.numer);
cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
int num_cpus = 0;
size_t size = sizeof(num_cpus);
int numcpus_name[] = {CTL_HW, HW_NCPU};
if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) ==
0 &&
(size == sizeof(num_cpus)))
cpuinfo_num_cpus = num_cpus;
#else
// Generic cycles per second counter
cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
#endif
}
} // end namespace
// getrusage() based implementation of MyCPUUsage
static double MyCPUUsageRUsage() {
#ifndef BENCHMARK_OS_WINDOWS
struct rusage ru;
if (getrusage(RUSAGE_SELF, &ru) == 0) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
} else {
return 0.0;
}
#else
HANDLE proc = GetCurrentProcess();
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
ULARGE_INTEGER kernel;
ULARGE_INTEGER user;
GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
kernel.HighPart = kernel_time.dwHighDateTime;
kernel.LowPart = kernel_time.dwLowDateTime;
user.HighPart = user_time.dwHighDateTime;
user.LowPart = user_time.dwLowDateTime;
return (static_cast<double>(kernel.QuadPart) +
static_cast<double>(user.QuadPart)) * 1e-7;
#endif // OS_WINDOWS
}
#ifndef BENCHMARK_OS_WINDOWS
static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
static int cputime_fd = -1;
if (cputime_fd == -1) {
cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
if (cputime_fd < 0) {
cputime_fd = -1;
return false;
}
}
char buff[64];
memset(buff, 0, sizeof(buff));
if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
close(cputime_fd);
cputime_fd = -1;
return false;
}
unsigned long long result = strtoull(buff, nullptr, 0);
if (result == (std::numeric_limits<unsigned long long>::max)()) {
close(cputime_fd);
cputime_fd = -1;
return false;
}
*cputime = static_cast<double>(result) / 1e9;
return true;
}
#endif // OS_WINDOWS
double MyCPUUsage() {
#ifndef BENCHMARK_OS_WINDOWS
{
std::lock_guard<std::mutex> l(cputimens_mutex);
static bool use_cputime_ns = true;
if (use_cputime_ns) {
double value;
if (MyCPUUsageCPUTimeNsLocked(&value)) {
return value;
}
// Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
use_cputime_ns = false;
}
}
#endif // OS_WINDOWS
return MyCPUUsageRUsage();
}
double ChildrenCPUUsage() {
#ifndef BENCHMARK_OS_WINDOWS
struct rusage ru;
if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
} else {
return 0.0;
}
#else
// TODO: Not sure what this even means on Windows
return 0.0;
#endif // OS_WINDOWS
}
double CyclesPerSecond(void) {
std::call_once(cpuinfo_init, InitializeSystemInfo);
return cpuinfo_cycles_per_second;
}
int NumCPUs(void) {
std::call_once(cpuinfo_init, InitializeSystemInfo);
return cpuinfo_num_cpus;
}
// The ""'s catch people who don't pass in a literal for "str"
#define strliterallen(str) (sizeof("" str "") - 1)
// Must use a string literal for prefix.
#define memprefix(str, len, prefix) \
((((len) >= strliterallen(prefix)) && \
std::memcmp(str, prefix, strliterallen(prefix)) == 0) \
? str + strliterallen(prefix) \
: nullptr)
bool CpuScalingEnabled() {
#ifndef BENCHMARK_OS_WINDOWS
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
// local file system. If reading the exported files fails, then we may not be
// running on Linux, so we silently ignore all the read errors.
for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu,
"/cpufreq/scaling_governor");
FILE* file = fopen(governor_file.c_str(), "r");
if (!file) break;
char buff[16];
size_t bytes_read = fread(buff, 1, sizeof(buff), file);
fclose(file);
if (memprefix(buff, bytes_read, "performance") == nullptr) return true;
}
#endif
return false;
}
} // end namespace benchmark