mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 18:16:42 +00:00
[libc++] Optimize ranges::swap_ranges for vector<bool>::iterator (#121150)
This PR optimizes the performance of `std::ranges::swap_ranges` for `vector<bool>::iterator`, addressing a subtask outlined in issue #64038. The optimizations yield performance improvements of up to **611x** for aligned range swap and **78x** for unaligned range swap comparison. Additionally, comprehensive tests covering up to 4 storage words (256 bytes) with odd and even bit sizes are provided, which validate the proposed optimizations in this patch.
This commit is contained in:
parent
b08769c3ec
commit
a12744ff05
@ -50,6 +50,9 @@ Improvements and New Features
|
||||
- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
|
||||
improvement of up to 188x.
|
||||
|
||||
- The ``std::ranges::swap_ranges`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a
|
||||
performance improvement of up to 611x.
|
||||
|
||||
- Updated formatting library to Unicode 16.0.0.
|
||||
|
||||
Deprecations and Removals
|
||||
|
@ -10,9 +10,12 @@
|
||||
#define _LIBCPP___ALGORITHM_SWAP_RANGES_H
|
||||
|
||||
#include <__algorithm/iterator_operations.h>
|
||||
#include <__algorithm/min.h>
|
||||
#include <__config>
|
||||
#include <__fwd/bit_reference.h>
|
||||
#include <__utility/move.h>
|
||||
#include <__utility/pair.h>
|
||||
#include <__utility/swap.h>
|
||||
|
||||
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
||||
# pragma GCC system_header
|
||||
@ -23,6 +26,165 @@ _LIBCPP_PUSH_MACROS
|
||||
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _Cl, class _Cr>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_aligned(
|
||||
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
|
||||
using _I1 = __bit_iterator<_Cl, false>;
|
||||
using difference_type = typename _I1::difference_type;
|
||||
using __storage_type = typename _I1::__storage_type;
|
||||
|
||||
const int __bits_per_word = _I1::__bits_per_word;
|
||||
difference_type __n = __last - __first;
|
||||
if (__n > 0) {
|
||||
// do first word
|
||||
if (__first.__ctz_ != 0) {
|
||||
unsigned __clz = __bits_per_word - __first.__ctz_;
|
||||
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
|
||||
__n -= __dn;
|
||||
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
|
||||
__storage_type __b1 = *__first.__seg_ & __m;
|
||||
*__first.__seg_ &= ~__m;
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1;
|
||||
*__first.__seg_ |= __b2;
|
||||
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
|
||||
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
|
||||
++__first.__seg_;
|
||||
// __first.__ctz_ = 0;
|
||||
}
|
||||
// __first.__ctz_ == 0;
|
||||
// do middle words
|
||||
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
|
||||
swap(*__first.__seg_, *__result.__seg_);
|
||||
// do last word
|
||||
if (__n > 0) {
|
||||
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
|
||||
__storage_type __b1 = *__first.__seg_ & __m;
|
||||
*__first.__seg_ &= ~__m;
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1;
|
||||
*__first.__seg_ |= __b2;
|
||||
__result.__ctz_ = static_cast<unsigned>(__n);
|
||||
}
|
||||
}
|
||||
return __result;
|
||||
}
|
||||
|
||||
template <class _Cl, class _Cr>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_unaligned(
|
||||
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
|
||||
using _I1 = __bit_iterator<_Cl, false>;
|
||||
using difference_type = typename _I1::difference_type;
|
||||
using __storage_type = typename _I1::__storage_type;
|
||||
|
||||
const int __bits_per_word = _I1::__bits_per_word;
|
||||
difference_type __n = __last - __first;
|
||||
if (__n > 0) {
|
||||
// do first word
|
||||
if (__first.__ctz_ != 0) {
|
||||
unsigned __clz_f = __bits_per_word - __first.__ctz_;
|
||||
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
|
||||
__n -= __dn;
|
||||
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
|
||||
__storage_type __b1 = *__first.__seg_ & __m;
|
||||
*__first.__seg_ &= ~__m;
|
||||
unsigned __clz_r = __bits_per_word - __result.__ctz_;
|
||||
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
|
||||
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
if (__result.__ctz_ > __first.__ctz_) {
|
||||
unsigned __s = __result.__ctz_ - __first.__ctz_;
|
||||
*__result.__seg_ |= __b1 << __s;
|
||||
*__first.__seg_ |= __b2 >> __s;
|
||||
} else {
|
||||
unsigned __s = __first.__ctz_ - __result.__ctz_;
|
||||
*__result.__seg_ |= __b1 >> __s;
|
||||
*__first.__seg_ |= __b2 << __s;
|
||||
}
|
||||
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
|
||||
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
|
||||
__dn -= __ddn;
|
||||
if (__dn > 0) {
|
||||
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
|
||||
__b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
unsigned __s = __first.__ctz_ + __ddn;
|
||||
*__result.__seg_ |= __b1 >> __s;
|
||||
*__first.__seg_ |= __b2 << __s;
|
||||
__result.__ctz_ = static_cast<unsigned>(__dn);
|
||||
}
|
||||
++__first.__seg_;
|
||||
// __first.__ctz_ = 0;
|
||||
}
|
||||
// __first.__ctz_ == 0;
|
||||
// do middle words
|
||||
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
|
||||
unsigned __clz_r = __bits_per_word - __result.__ctz_;
|
||||
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
|
||||
__storage_type __b1 = *__first.__seg_;
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1 << __result.__ctz_;
|
||||
*__first.__seg_ = __b2 >> __result.__ctz_;
|
||||
++__result.__seg_;
|
||||
__b2 = *__result.__seg_ & ~__m;
|
||||
*__result.__seg_ &= __m;
|
||||
*__result.__seg_ |= __b1 >> __clz_r;
|
||||
*__first.__seg_ |= __b2 << __clz_r;
|
||||
}
|
||||
// do last word
|
||||
if (__n > 0) {
|
||||
__m = ~__storage_type(0) >> (__bits_per_word - __n);
|
||||
__storage_type __b1 = *__first.__seg_ & __m;
|
||||
*__first.__seg_ &= ~__m;
|
||||
__storage_type __dn = std::min<__storage_type>(__n, __clz_r);
|
||||
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1 << __result.__ctz_;
|
||||
*__first.__seg_ |= __b2 >> __result.__ctz_;
|
||||
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
|
||||
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
|
||||
__n -= __dn;
|
||||
if (__n > 0) {
|
||||
__m = ~__storage_type(0) >> (__bits_per_word - __n);
|
||||
__b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1 >> __dn;
|
||||
*__first.__seg_ |= __b2 << __dn;
|
||||
__result.__ctz_ = static_cast<unsigned>(__n);
|
||||
}
|
||||
}
|
||||
}
|
||||
return __result;
|
||||
}
|
||||
|
||||
// 2+1 iterators: size2 >= size1; used by std::swap_ranges.
|
||||
template <class, class _Cl, class _Cr>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
|
||||
__swap_ranges(__bit_iterator<_Cl, false> __first1,
|
||||
__bit_iterator<_Cl, false> __last1,
|
||||
__bit_iterator<_Cr, false> __first2) {
|
||||
if (__first1.__ctz_ == __first2.__ctz_)
|
||||
return std::make_pair(__last1, std::__swap_ranges_aligned(__first1, __last1, __first2));
|
||||
return std::make_pair(__last1, std::__swap_ranges_unaligned(__first1, __last1, __first2));
|
||||
}
|
||||
|
||||
// 2+2 iterators: used by std::ranges::swap_ranges.
|
||||
template <class _AlgPolicy, class _Cl, class _Cr>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
|
||||
__swap_ranges(__bit_iterator<_Cl, false> __first1,
|
||||
__bit_iterator<_Cl, false> __last1,
|
||||
__bit_iterator<_Cr, false> __first2,
|
||||
__bit_iterator<_Cr, false> __last2) {
|
||||
if (__last1 - __first1 < __last2 - __first2)
|
||||
return std::make_pair(__last1, std::__swap_ranges<_AlgPolicy>(__first1, __last1, __first2).second);
|
||||
return std::make_pair(std::__swap_ranges<_AlgPolicy>(__first2, __last2, __first1).second, __last2);
|
||||
}
|
||||
|
||||
// 2+2 iterators: the shorter size will be used.
|
||||
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _Sentinel2>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator1, _ForwardIterator2>
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <__algorithm/copy_n.h>
|
||||
#include <__algorithm/equal.h>
|
||||
#include <__algorithm/min.h>
|
||||
#include <__algorithm/swap_ranges.h>
|
||||
#include <__assert>
|
||||
#include <__bit/countr.h>
|
||||
#include <__compare/ordering.h>
|
||||
@ -215,152 +216,6 @@ private:
|
||||
__mask_(__m) {}
|
||||
};
|
||||
|
||||
// swap_ranges
|
||||
|
||||
template <class _Cl, class _Cr>
|
||||
_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_aligned(
|
||||
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
|
||||
using _I1 = __bit_iterator<_Cl, false>;
|
||||
using difference_type = typename _I1::difference_type;
|
||||
using __storage_type = typename _I1::__storage_type;
|
||||
|
||||
const int __bits_per_word = _I1::__bits_per_word;
|
||||
difference_type __n = __last - __first;
|
||||
if (__n > 0) {
|
||||
// do first word
|
||||
if (__first.__ctz_ != 0) {
|
||||
unsigned __clz = __bits_per_word - __first.__ctz_;
|
||||
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
|
||||
__n -= __dn;
|
||||
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
|
||||
__storage_type __b1 = *__first.__seg_ & __m;
|
||||
*__first.__seg_ &= ~__m;
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1;
|
||||
*__first.__seg_ |= __b2;
|
||||
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
|
||||
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
|
||||
++__first.__seg_;
|
||||
// __first.__ctz_ = 0;
|
||||
}
|
||||
// __first.__ctz_ == 0;
|
||||
// do middle words
|
||||
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
|
||||
swap(*__first.__seg_, *__result.__seg_);
|
||||
// do last word
|
||||
if (__n > 0) {
|
||||
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
|
||||
__storage_type __b1 = *__first.__seg_ & __m;
|
||||
*__first.__seg_ &= ~__m;
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1;
|
||||
*__first.__seg_ |= __b2;
|
||||
__result.__ctz_ = static_cast<unsigned>(__n);
|
||||
}
|
||||
}
|
||||
return __result;
|
||||
}
|
||||
|
||||
template <class _Cl, class _Cr>
|
||||
_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_unaligned(
|
||||
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
|
||||
using _I1 = __bit_iterator<_Cl, false>;
|
||||
using difference_type = typename _I1::difference_type;
|
||||
using __storage_type = typename _I1::__storage_type;
|
||||
|
||||
const int __bits_per_word = _I1::__bits_per_word;
|
||||
difference_type __n = __last - __first;
|
||||
if (__n > 0) {
|
||||
// do first word
|
||||
if (__first.__ctz_ != 0) {
|
||||
unsigned __clz_f = __bits_per_word - __first.__ctz_;
|
||||
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
|
||||
__n -= __dn;
|
||||
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
|
||||
__storage_type __b1 = *__first.__seg_ & __m;
|
||||
*__first.__seg_ &= ~__m;
|
||||
unsigned __clz_r = __bits_per_word - __result.__ctz_;
|
||||
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
|
||||
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
if (__result.__ctz_ > __first.__ctz_) {
|
||||
unsigned __s = __result.__ctz_ - __first.__ctz_;
|
||||
*__result.__seg_ |= __b1 << __s;
|
||||
*__first.__seg_ |= __b2 >> __s;
|
||||
} else {
|
||||
unsigned __s = __first.__ctz_ - __result.__ctz_;
|
||||
*__result.__seg_ |= __b1 >> __s;
|
||||
*__first.__seg_ |= __b2 << __s;
|
||||
}
|
||||
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
|
||||
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
|
||||
__dn -= __ddn;
|
||||
if (__dn > 0) {
|
||||
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
|
||||
__b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
unsigned __s = __first.__ctz_ + __ddn;
|
||||
*__result.__seg_ |= __b1 >> __s;
|
||||
*__first.__seg_ |= __b2 << __s;
|
||||
__result.__ctz_ = static_cast<unsigned>(__dn);
|
||||
}
|
||||
++__first.__seg_;
|
||||
// __first.__ctz_ = 0;
|
||||
}
|
||||
// __first.__ctz_ == 0;
|
||||
// do middle words
|
||||
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
|
||||
unsigned __clz_r = __bits_per_word - __result.__ctz_;
|
||||
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
|
||||
__storage_type __b1 = *__first.__seg_;
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1 << __result.__ctz_;
|
||||
*__first.__seg_ = __b2 >> __result.__ctz_;
|
||||
++__result.__seg_;
|
||||
__b2 = *__result.__seg_ & ~__m;
|
||||
*__result.__seg_ &= __m;
|
||||
*__result.__seg_ |= __b1 >> __clz_r;
|
||||
*__first.__seg_ |= __b2 << __clz_r;
|
||||
}
|
||||
// do last word
|
||||
if (__n > 0) {
|
||||
__m = ~__storage_type(0) >> (__bits_per_word - __n);
|
||||
__storage_type __b1 = *__first.__seg_ & __m;
|
||||
*__first.__seg_ &= ~__m;
|
||||
__storage_type __dn = std::min<__storage_type>(__n, __clz_r);
|
||||
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
|
||||
__storage_type __b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1 << __result.__ctz_;
|
||||
*__first.__seg_ |= __b2 >> __result.__ctz_;
|
||||
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
|
||||
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
|
||||
__n -= __dn;
|
||||
if (__n > 0) {
|
||||
__m = ~__storage_type(0) >> (__bits_per_word - __n);
|
||||
__b2 = *__result.__seg_ & __m;
|
||||
*__result.__seg_ &= ~__m;
|
||||
*__result.__seg_ |= __b1 >> __dn;
|
||||
*__first.__seg_ |= __b2 << __dn;
|
||||
__result.__ctz_ = static_cast<unsigned>(__n);
|
||||
}
|
||||
}
|
||||
}
|
||||
return __result;
|
||||
}
|
||||
|
||||
template <class _Cl, class _Cr>
|
||||
inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges(
|
||||
__bit_iterator<_Cl, false> __first1, __bit_iterator<_Cl, false> __last1, __bit_iterator<_Cr, false> __first2) {
|
||||
if (__first1.__ctz_ == __first2.__ctz_)
|
||||
return std::__swap_ranges_aligned(__first1, __last1, __first2);
|
||||
return std::__swap_ranges_unaligned(__first1, __last1, __first2);
|
||||
}
|
||||
|
||||
// rotate
|
||||
|
||||
template <class _Cp>
|
||||
@ -644,14 +499,14 @@ private:
|
||||
template <class _AlgPolicy>
|
||||
friend struct __copy_backward_impl;
|
||||
template <class _Cl, class _Cr>
|
||||
friend __bit_iterator<_Cr, false>
|
||||
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
|
||||
__swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
|
||||
template <class _Cl, class _Cr>
|
||||
friend __bit_iterator<_Cr, false>
|
||||
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
|
||||
__swap_ranges_unaligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
|
||||
template <class _Cl, class _Cr>
|
||||
friend __bit_iterator<_Cr, false>
|
||||
swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
|
||||
template <class, class _Cl, class _Cr>
|
||||
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
|
||||
__swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
|
||||
template <class _Dp>
|
||||
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
|
||||
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
|
||||
|
66
libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
Normal file
66
libcxx/test/benchmarks/algorithms/swap_ranges.bench.cpp
Normal file
@ -0,0 +1,66 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// UNSUPPORTED: c++03, c++11, c++14, c++17
|
||||
|
||||
#include <algorithm>
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <vector>
|
||||
|
||||
static void bm_ranges_swap_ranges_vb_aligned(benchmark::State& state) {
|
||||
auto n = state.range();
|
||||
std::vector<bool> vec1(n, true);
|
||||
std::vector<bool> vec2(n, false);
|
||||
for (auto _ : state) {
|
||||
benchmark::DoNotOptimize(std::ranges::swap_ranges(vec1, vec2));
|
||||
benchmark::DoNotOptimize(&vec1);
|
||||
benchmark::DoNotOptimize(&vec2);
|
||||
}
|
||||
}
|
||||
|
||||
static void bm_ranges_swap_ranges_vb_unaligned(benchmark::State& state) {
|
||||
auto n = state.range();
|
||||
std::vector<bool> vec1(n, true);
|
||||
std::vector<bool> vec2(n + 8, true);
|
||||
auto beg1 = std::ranges::begin(vec1);
|
||||
auto end1 = std::ranges::end(vec1);
|
||||
auto beg2 = std::ranges::begin(vec2) + 4;
|
||||
auto end2 = std::ranges::end(vec2) - 4;
|
||||
for (auto _ : state) {
|
||||
benchmark::DoNotOptimize(std::ranges::swap_ranges(beg1, end1, beg2, end2));
|
||||
benchmark::DoNotOptimize(&vec1);
|
||||
benchmark::DoNotOptimize(&vec2);
|
||||
}
|
||||
}
|
||||
|
||||
// Test std::ranges::swap_ranges for vector<bool>::iterator
|
||||
BENCHMARK(bm_ranges_swap_ranges_vb_aligned)->RangeMultiplier(2)->Range(8, 1 << 20);
|
||||
BENCHMARK(bm_ranges_swap_ranges_vb_unaligned)->Range(8, 1 << 20);
|
||||
|
||||
static void bm_swap_ranges_vb(benchmark::State& state, bool aligned) {
|
||||
auto n = state.range();
|
||||
std::vector<bool> vec1(n, true);
|
||||
std::vector<bool> vec2(aligned ? n : n + 8, true);
|
||||
auto beg1 = vec1.begin();
|
||||
auto end1 = vec1.end();
|
||||
auto beg2 = aligned ? vec2.begin() : vec2.begin() + 4;
|
||||
for (auto _ : state) {
|
||||
benchmark::DoNotOptimize(std::swap_ranges(beg1, end1, beg2));
|
||||
benchmark::DoNotOptimize(&vec1);
|
||||
benchmark::DoNotOptimize(&vec2);
|
||||
}
|
||||
}
|
||||
|
||||
static void bm_swap_ranges_vb_aligned(benchmark::State& state) { bm_swap_ranges_vb(state, true); }
|
||||
static void bm_swap_ranges_vb_unaligned(benchmark::State& state) { bm_swap_ranges_vb(state, false); }
|
||||
|
||||
// Test std::swap_ranges for vector<bool>::iterator
|
||||
BENCHMARK(bm_swap_ranges_vb_aligned)->Range(8, 1 << 20);
|
||||
BENCHMARK(bm_swap_ranges_vb_unaligned)->Range(8, 1 << 20);
|
||||
|
||||
BENCHMARK_MAIN();
|
@ -23,6 +23,7 @@
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <ranges>
|
||||
#include <vector>
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
@ -134,6 +135,22 @@ constexpr void test_rval_range() {
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t N>
|
||||
constexpr void test_vector_bool() {
|
||||
{ // Test swap_ranges() with aligned bytes
|
||||
std::vector<bool> f(N, false), t(N, true);
|
||||
std::ranges::swap_ranges(f, t);
|
||||
assert(std::all_of(f.begin(), f.end(), [](bool b) { return b; }));
|
||||
assert(std::all_of(t.begin(), t.end(), [](bool b) { return !b; }));
|
||||
}
|
||||
{ // Test swap_ranges() with unaligned bytes
|
||||
std::vector<bool> f(N, false), t(N + 8, true);
|
||||
std::ranges::swap_ranges(f.begin(), f.end(), t.begin() + 4, t.end() - 4);
|
||||
assert(std::all_of(f.begin(), f.end(), [](bool b) { return b; }));
|
||||
assert(std::all_of(t.begin() + 4, t.end() - 4, [](bool b) { return !b; }));
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool test() {
|
||||
test_range();
|
||||
test_sentinel();
|
||||
@ -148,6 +165,15 @@ constexpr bool test() {
|
||||
});
|
||||
});
|
||||
|
||||
{ // Test vector<bool>::iterator optimization
|
||||
test_vector_bool<8>();
|
||||
test_vector_bool<19>();
|
||||
test_vector_bool<32>();
|
||||
test_vector_bool<49>();
|
||||
test_vector_bool<64>();
|
||||
test_vector_bool<199>();
|
||||
test_vector_bool<256>();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "test_macros.h"
|
||||
#include "test_iterators.h"
|
||||
@ -110,6 +111,23 @@ TEST_CONSTEXPR_CXX20 bool test_simple_cases() {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <std::size_t N>
|
||||
TEST_CONSTEXPR_CXX20 void test_vector_bool() {
|
||||
std::vector<bool> f(N, false), t(N, true);
|
||||
{ // Test swap_ranges() with aligned bytes
|
||||
std::vector<bool> f1 = f, t1 = t;
|
||||
std::swap_ranges(f1.begin(), f1.end(), t1.begin());
|
||||
assert(f1 == t);
|
||||
assert(t1 == f);
|
||||
}
|
||||
{ // Test swap_ranges() with unaligned bytes
|
||||
std::vector<bool> f1(N, false), t1(N + 8, true);
|
||||
std::swap_ranges(f1.begin(), f1.end(), t1.begin() + 4);
|
||||
assert(std::equal(f1.begin(), f1.end(), t.begin()));
|
||||
assert(std::equal(t1.begin() + 4, t1.end() - 4, f.begin()));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CONSTEXPR_CXX20 bool test() {
|
||||
test_simple_cases<forward_iterator, forward_iterator>();
|
||||
test_simple_cases<forward_iterator, bidirectional_iterator>();
|
||||
@ -130,6 +148,16 @@ TEST_CONSTEXPR_CXX20 bool test() {
|
||||
types::for_each(types::forward_iterator_list<std::unique_ptr<int>*>(), TestUniquePtr());
|
||||
#endif
|
||||
|
||||
{ // Test vector<bool>::iterator optimization
|
||||
test_vector_bool<8>();
|
||||
test_vector_bool<19>();
|
||||
test_vector_bool<32>();
|
||||
test_vector_bool<49>();
|
||||
test_vector_bool<64>();
|
||||
test_vector_bool<199>();
|
||||
test_vector_bool<256>();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user