Bug 39129: Speeding up partition_point/lower_bound/upper_bound/ by using unsigned division by 2 when possible.
Patch by Denis Yaroshevskiy (denis.yaroshevskij@gmail.com) The rational and measurements can be found in the bug description: https://bugs.llvm.org/show_bug.cgi?id=39129 Reviewed as https://reviews.llvm.org/D52697 git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@345525 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -58,5 +58,69 @@ BENCHMARK_CAPTURE(BM_Sort, sorted_descending_strings,
|
|||||||
BENCHMARK_CAPTURE(BM_Sort, single_element_strings,
|
BENCHMARK_CAPTURE(BM_Sort, single_element_strings,
|
||||||
getDuplicateStringInputs)->Arg(TestNumInputs);
|
getDuplicateStringInputs)->Arg(TestNumInputs);
|
||||||
|
|
||||||
|
template <typename GenInputs, typename Alg>
|
||||||
|
void do_binary_search_benchmark(benchmark::State& st, GenInputs gen, Alg alg)
|
||||||
|
{
|
||||||
|
using ValueType = typename decltype(gen(0))::value_type;
|
||||||
|
auto in = gen(st.range(0));
|
||||||
|
std::sort(in.begin(), in.end());
|
||||||
|
|
||||||
|
const auto every_10_percentile = [&]() -> std::vector<ValueType*> {
|
||||||
|
size_t step = in.size() / 10;
|
||||||
|
|
||||||
|
if (step == 0) {
|
||||||
|
st.SkipWithError("Input doesn't contain enough elements");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<ValueType*> res;
|
||||||
|
for (size_t i = 0; i < in.size(); i += step)
|
||||||
|
res.push_back(&in[i]);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}();
|
||||||
|
|
||||||
|
for (auto _ : st)
|
||||||
|
{
|
||||||
|
for (auto* test : every_10_percentile)
|
||||||
|
benchmark::DoNotOptimize(alg(in.begin(), in.end(), *test));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename GenInputs>
|
||||||
|
void BM_LowerBound(benchmark::State& st, GenInputs gen)
|
||||||
|
{
|
||||||
|
do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) {
|
||||||
|
return std::lower_bound(f, l, v);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
BENCHMARK_CAPTURE(BM_LowerBound, random_int32, getRandomIntegerInputs<int32_t>)
|
||||||
|
->Arg(TestNumInputs) // Small int32_t vector
|
||||||
|
->Arg(TestNumInputs * TestNumInputs); // Big int32_t vector
|
||||||
|
|
||||||
|
BENCHMARK_CAPTURE(BM_LowerBound, random_int64, getRandomIntegerInputs<int64_t>)
|
||||||
|
->Arg(TestNumInputs); // Small int64_t vector. Should also represent pointers.
|
||||||
|
|
||||||
|
BENCHMARK_CAPTURE(BM_LowerBound, random_strings, getRandomStringInputs)
|
||||||
|
->Arg(TestNumInputs); // Small string vector. What happens if the comparison is not very cheap.
|
||||||
|
|
||||||
|
template <typename GenInputs>
|
||||||
|
void BM_EqualRange(benchmark::State& st, GenInputs gen)
|
||||||
|
{
|
||||||
|
do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) {
|
||||||
|
return std::equal_range(f, l, v);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
BENCHMARK_CAPTURE(BM_EqualRange, random_int32, getRandomIntegerInputs<int32_t>)
|
||||||
|
->Arg(TestNumInputs) // Small int32_t vector
|
||||||
|
->Arg(TestNumInputs * TestNumInputs); // Big int32_t vector
|
||||||
|
|
||||||
|
BENCHMARK_CAPTURE(BM_EqualRange, random_int64, getRandomIntegerInputs<int64_t>)
|
||||||
|
->Arg(TestNumInputs); // Small int64_t vector. Should also represent pointers.
|
||||||
|
|
||||||
|
BENCHMARK_CAPTURE(BM_EqualRange, random_strings, getRandomStringInputs)
|
||||||
|
->Arg(TestNumInputs); // Small string vector. What happens if the comparison is not very cheap.
|
||||||
|
|
||||||
BENCHMARK_MAIN();
|
BENCHMARK_MAIN();
|
||||||
|
|||||||
@@ -750,6 +750,32 @@ public:
|
|||||||
bool operator()(const _T1& __x, const _T2& __y) {return __p_(__y, __x);}
|
bool operator()(const _T1& __x, const _T2& __y) {return __p_(__y, __x);}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Perform division by two quickly for positive integers (llvm.org/PR39129)
|
||||||
|
|
||||||
|
template <typename _Integral>
|
||||||
|
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
|
||||||
|
typename enable_if
|
||||||
|
<
|
||||||
|
is_integral<_Integral>::value,
|
||||||
|
_Integral
|
||||||
|
>::type
|
||||||
|
__half_positive(_Integral __value)
|
||||||
|
{
|
||||||
|
return static_cast<_Integral>(static_cast<typename make_unsigned<_Integral>::type>(__value) / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename _Tp>
|
||||||
|
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
|
||||||
|
typename enable_if
|
||||||
|
<
|
||||||
|
!is_integral<_Tp>::value,
|
||||||
|
_Tp
|
||||||
|
>::type
|
||||||
|
__half_positive(_Tp __value)
|
||||||
|
{
|
||||||
|
return __value / 2;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef _LIBCPP_DEBUG
|
#ifdef _LIBCPP_DEBUG
|
||||||
|
|
||||||
template <class _Compare>
|
template <class _Compare>
|
||||||
@@ -3202,7 +3228,7 @@ partition_point(_ForwardIterator __first, _ForwardIterator __last, _Predicate __
|
|||||||
difference_type __len = _VSTD::distance(__first, __last);
|
difference_type __len = _VSTD::distance(__first, __last);
|
||||||
while (__len != 0)
|
while (__len != 0)
|
||||||
{
|
{
|
||||||
difference_type __l2 = __len / 2;
|
difference_type __l2 = _VSTD::__half_positive(__len);
|
||||||
_ForwardIterator __m = __first;
|
_ForwardIterator __m = __first;
|
||||||
_VSTD::advance(__m, __l2);
|
_VSTD::advance(__m, __l2);
|
||||||
if (__pred(*__m))
|
if (__pred(*__m))
|
||||||
@@ -4069,7 +4095,7 @@ __lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va
|
|||||||
difference_type __len = _VSTD::distance(__first, __last);
|
difference_type __len = _VSTD::distance(__first, __last);
|
||||||
while (__len != 0)
|
while (__len != 0)
|
||||||
{
|
{
|
||||||
difference_type __l2 = __len / 2;
|
difference_type __l2 = _VSTD::__half_positive(__len);
|
||||||
_ForwardIterator __m = __first;
|
_ForwardIterator __m = __first;
|
||||||
_VSTD::advance(__m, __l2);
|
_VSTD::advance(__m, __l2);
|
||||||
if (__comp(*__m, __value_))
|
if (__comp(*__m, __value_))
|
||||||
@@ -4111,7 +4137,7 @@ __upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va
|
|||||||
difference_type __len = _VSTD::distance(__first, __last);
|
difference_type __len = _VSTD::distance(__first, __last);
|
||||||
while (__len != 0)
|
while (__len != 0)
|
||||||
{
|
{
|
||||||
difference_type __l2 = __len / 2;
|
difference_type __l2 = _VSTD::__half_positive(__len);
|
||||||
_ForwardIterator __m = __first;
|
_ForwardIterator __m = __first;
|
||||||
_VSTD::advance(__m, __l2);
|
_VSTD::advance(__m, __l2);
|
||||||
if (__comp(__value_, *__m))
|
if (__comp(__value_, *__m))
|
||||||
@@ -4153,7 +4179,7 @@ __equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va
|
|||||||
difference_type __len = _VSTD::distance(__first, __last);
|
difference_type __len = _VSTD::distance(__first, __last);
|
||||||
while (__len != 0)
|
while (__len != 0)
|
||||||
{
|
{
|
||||||
difference_type __l2 = __len / 2;
|
difference_type __l2 = _VSTD::__half_positive(__len);
|
||||||
_ForwardIterator __m = __first;
|
_ForwardIterator __m = __first;
|
||||||
_VSTD::advance(__m, __l2);
|
_VSTD::advance(__m, __l2);
|
||||||
if (__comp(*__m, __value_))
|
if (__comp(*__m, __value_))
|
||||||
|
|||||||
59
test/libcxx/algorithms/half_positive.pass.cpp
Normal file
59
test/libcxx/algorithms/half_positive.pass.cpp
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is dual licensed under the MIT and the University of Illinois Open
|
||||||
|
// Source Licenses. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// <algorithm>
|
||||||
|
|
||||||
|
// template <typename _Tp> _Tp __half_positive(const _Tp&);
|
||||||
|
|
||||||
|
// __half_positive divide integer number by 2 as unsigned number
|
||||||
|
// if it's safe to do so. It can be an important optimization for lower bound,
|
||||||
|
// for example.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <limits>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include "test_macros.h"
|
||||||
|
#include "user_defined_integral.hpp"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
template <class IntType, class UnderlyingType = IntType>
|
||||||
|
TEST_CONSTEXPR bool test(IntType max_v = IntType(std::numeric_limits<UnderlyingType>::max())) {
|
||||||
|
return std::__half_positive(max_v) == max_v / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
assert(test<char>());
|
||||||
|
assert(test<int>());
|
||||||
|
assert(test<long>());
|
||||||
|
assert((test<UserDefinedIntegral<int>, int>()));
|
||||||
|
assert(test<size_t>());
|
||||||
|
#if !defined(_LIBCPP_HAS_NO_INT128)
|
||||||
|
assert(test<__int128_t>());
|
||||||
|
#endif // !defined(_LIBCPP_HAS_NO_INT128)
|
||||||
|
}
|
||||||
|
|
||||||
|
#if TEST_STD_VER >= 11
|
||||||
|
{
|
||||||
|
static_assert(test<char>(), "");
|
||||||
|
static_assert(test<int>(), "");
|
||||||
|
static_assert(test<long>(), "");
|
||||||
|
static_assert(test<size_t>(), "");
|
||||||
|
#if !defined(_LIBCPP_HAS_NO_INT128)
|
||||||
|
static_assert(test<__int128_t>(), "");
|
||||||
|
#endif // !defined(_LIBCPP_HAS_NO_INT128)
|
||||||
|
}
|
||||||
|
#endif // TEST_STD_VER >= 11
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user