Reapply "[libc++] Simplify the implementation of std::sort a bit (#104902)" (#114023)

This reverts commit ef44e4659878f2. The patch was originally reverted
because it was
deemed to introduce a performance regression for small inputs, however
it also fixed
a previous performance regression for larger inputs. So overall, this
patch is desirable.
This commit is contained in:
Nikolas Klauser 2024-10-30 11:51:55 +01:00 committed by GitHub
parent f7b5f0c805
commit 0fb76bae6b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 168 additions and 189 deletions

View File

@ -42,6 +42,9 @@ struct __less<void, void> {
}
};
template <class _Tp>
inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true;
template <class _Tp>
inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value;

View File

@ -89,7 +89,7 @@ struct __minmax {
// vectorize the code.
if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> &&
__is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value &&
__desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) {
__desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) {
minmax_result<_ValueT> __result = {__r[0], __r[0]};
for (auto __e : __r) {
if (__e < __result.min)

View File

@ -27,11 +27,13 @@
#include <__functional/ranges_operations.h>
#include <__iterator/iterator_traits.h>
#include <__type_traits/conditional.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/disjunction.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_arithmetic.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_trivially_copyable.h>
#include <__type_traits/remove_cvref.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@ -47,49 +49,120 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
inline const bool __use_branchless_sort =
__libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value &&
(__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> ||
__desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>);
namespace __detail {
// Size in bits for the bitset in use.
enum { __block_size = sizeof(uint64_t) * 8 };
} // namespace __detail
// Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
template <class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
__cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
// Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
bool __r = __c(*__x, *__y);
value_type __tmp = __r ? *__x : *__y;
*__y = __r ? *__y : *__x;
*__x = __tmp;
return !__r;
}
// Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
// under the assumption that *__y and *__z are already ordered.
template <class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
__partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) {
// Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
bool __r1 = __c(*__z, *__x);
value_type __tmp = __r1 ? *__z : *__x;
*__z = __r1 ? *__x : *__z;
bool __r2 = __c(__tmp, *__y);
*__x = __r2 ? *__x : *__y;
*__y = __r2 ? *__y : __tmp;
return !__r1 || !__r2;
}
// stable, 2-3 compares, 0-2 swaps
template <class _AlgPolicy, class _Compare, class _ForwardIterator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned
__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) {
template <class,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
__sort3(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) {
bool __swapped1 = std::__cond_swap<_Compare>(__x2, __x3, __c);
bool __swapped2 = std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
return __swapped1 || __swapped2;
}
template <class _AlgPolicy,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
__sort3(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) {
using _Ops = _IterOps<_AlgPolicy>;
unsigned __r = 0;
if (!__c(*__y, *__x)) // if x <= y
{
if (!__c(*__z, *__y)) // if y <= z
return __r; // x <= y && y <= z
// x <= y && y > z
_Ops::iter_swap(__y, __z); // x <= z && y < z
__r = 1;
if (__c(*__y, *__x)) // if x > y
{
if (!__c(*__z, *__y)) // if y <= z
return false; // x <= y && y <= z
// x <= y && y > z
_Ops::iter_swap(__y, __z); // x <= z && y < z
if (__c(*__y, *__x)) // if x > y
_Ops::iter_swap(__x, __y); // x < y && y <= z
__r = 2;
}
return __r; // x <= y && y < z
return true; // x <= y && y < z
}
if (__c(*__z, *__y)) // x > y, if y > z
{
_Ops::iter_swap(__x, __z); // x < y && y < z
__r = 1;
return __r;
return true;
}
_Ops::iter_swap(__x, __y); // x > y && y <= z
__r = 1; // x < y && x <= z
if (__c(*__z, *__y)) // if y > z
{
// x < y && x <= z
if (__c(*__z, *__y)) // if y > z
_Ops::iter_swap(__y, __z); // x <= y && y < z
__r = 2;
}
return __r;
return true;
} // x <= y && y <= z
// stable, 3-6 compares, 0-5 swaps
template <class _AlgPolicy, class _Compare, class _ForwardIterator>
_LIBCPP_HIDE_FROM_ABI void
__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) {
template <class,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void
__sort4(_RandomAccessIterator __x1,
_RandomAccessIterator __x2,
_RandomAccessIterator __x3,
_RandomAccessIterator __x4,
_Compare __c) {
std::__cond_swap<_Compare>(__x1, __x3, __c);
std::__cond_swap<_Compare>(__x2, __x4, __c);
std::__cond_swap<_Compare>(__x1, __x2, __c);
std::__cond_swap<_Compare>(__x3, __x4, __c);
std::__cond_swap<_Compare>(__x2, __x3, __c);
}
template <class _AlgPolicy,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void
__sort4(_RandomAccessIterator __x1,
_RandomAccessIterator __x2,
_RandomAccessIterator __x3,
_RandomAccessIterator __x4,
_Compare __c) {
using _Ops = _IterOps<_AlgPolicy>;
std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
if (__c(*__x4, *__x3)) {
@ -105,17 +178,39 @@ __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _Fo
// stable, 4-10 compares, 0-9 swaps
template <class _AlgPolicy, class _Comp, class _ForwardIterator>
_LIBCPP_HIDE_FROM_ABI void
__sort5(_ForwardIterator __x1,
_ForwardIterator __x2,
_ForwardIterator __x3,
_ForwardIterator __x4,
_ForwardIterator __x5,
_Comp __comp) {
template <class _AlgPolicy,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void
__sort5(_RandomAccessIterator __x1,
_RandomAccessIterator __x2,
_RandomAccessIterator __x3,
_RandomAccessIterator __x4,
_RandomAccessIterator __x5,
_Compare __c) {
std::__cond_swap<_Compare>(__x1, __x2, __c);
std::__cond_swap<_Compare>(__x4, __x5, __c);
std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
std::__cond_swap<_Compare>(__x2, __x5, __c);
std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
}
template <class _AlgPolicy,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void
__sort5(_RandomAccessIterator __x1,
_RandomAccessIterator __x2,
_RandomAccessIterator __x3,
_RandomAccessIterator __x4,
_RandomAccessIterator __x5,
_Compare __comp) {
using _Ops = _IterOps<_AlgPolicy>;
std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp);
std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __comp);
if (__comp(*__x5, *__x4)) {
_Ops::iter_swap(__x4, __x5);
if (__comp(*__x4, *__x3)) {
@ -130,144 +225,6 @@ __sort5(_ForwardIterator __x1,
}
}
// The comparator being simple is a prerequisite for using the branchless optimization.
template <class _Tp>
struct __is_simple_comparator : false_type {};
template <>
struct __is_simple_comparator<__less<>&> : true_type {};
template <class _Tp>
struct __is_simple_comparator<less<_Tp>&> : true_type {};
template <class _Tp>
struct __is_simple_comparator<greater<_Tp>&> : true_type {};
#if _LIBCPP_STD_VER >= 20
template <>
struct __is_simple_comparator<ranges::less&> : true_type {};
template <>
struct __is_simple_comparator<ranges::greater&> : true_type {};
#endif
template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
using __use_branchless_sort =
integral_constant<bool,
__libcpp_is_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
namespace __detail {
// Size in bits for the bitset in use.
enum { __block_size = sizeof(uint64_t) * 8 };
} // namespace __detail
// Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
template <class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
// Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
bool __r = __c(*__x, *__y);
value_type __tmp = __r ? *__x : *__y;
*__y = __r ? *__y : *__x;
*__x = __tmp;
}
// Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
// under the assumption that *__y and *__z are already ordered.
template <class _Compare, class _RandomAccessIterator>
inline _LIBCPP_HIDE_FROM_ABI void
__partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) {
// Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
bool __r = __c(*__z, *__x);
value_type __tmp = __r ? *__z : *__x;
*__z = __r ? *__x : *__z;
__r = __c(__tmp, *__y);
*__x = __r ? *__x : *__y;
*__y = __r ? *__y : __tmp;
}
template <class,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless(
_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) {
std::__cond_swap<_Compare>(__x2, __x3, __c);
std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
}
template <class _AlgPolicy,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless(
_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) {
std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
}
template <class,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless(
_RandomAccessIterator __x1,
_RandomAccessIterator __x2,
_RandomAccessIterator __x3,
_RandomAccessIterator __x4,
_Compare __c) {
std::__cond_swap<_Compare>(__x1, __x3, __c);
std::__cond_swap<_Compare>(__x2, __x4, __c);
std::__cond_swap<_Compare>(__x1, __x2, __c);
std::__cond_swap<_Compare>(__x3, __x4, __c);
std::__cond_swap<_Compare>(__x2, __x3, __c);
}
template <class _AlgPolicy,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless(
_RandomAccessIterator __x1,
_RandomAccessIterator __x2,
_RandomAccessIterator __x3,
_RandomAccessIterator __x4,
_Compare __c) {
std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
}
template <class _AlgPolicy,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless(
_RandomAccessIterator __x1,
_RandomAccessIterator __x2,
_RandomAccessIterator __x3,
_RandomAccessIterator __x4,
_RandomAccessIterator __x5,
_Compare __c) {
std::__cond_swap<_Compare>(__x1, __x2, __c);
std::__cond_swap<_Compare>(__x4, __x5, __c);
std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
std::__cond_swap<_Compare>(__x2, __x5, __c);
std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
}
template <class _AlgPolicy,
class _Compare,
class _RandomAccessIterator,
__enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless(
_RandomAccessIterator __x1,
_RandomAccessIterator __x2,
_RandomAccessIterator __x3,
_RandomAccessIterator __x4,
_RandomAccessIterator __x5,
_Compare __c) {
std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>(
std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c);
}
// Assumes size > 0
template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
@ -355,14 +312,14 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator
_Ops::iter_swap(__first, __last);
return true;
case 3:
std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
return true;
case 4:
std::__sort4_maybe_branchless<_AlgPolicy, _Comp>(
std::__sort4<_AlgPolicy, _Comp>(
__first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
return true;
case 5:
std::__sort5_maybe_branchless<_AlgPolicy, _Comp>(
std::__sort5<_AlgPolicy, _Comp>(
__first,
__first + difference_type(1),
__first + difference_type(2),
@ -373,7 +330,7 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator
}
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
_RandomAccessIterator __j = __first + difference_type(2);
std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
const unsigned __limit = 8;
unsigned __count = 0;
for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
@ -780,14 +737,14 @@ void __introsort(_RandomAccessIterator __first,
_Ops::iter_swap(__first, __last);
return;
case 3:
std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
std::__sort3<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
return;
case 4:
std::__sort4_maybe_branchless<_AlgPolicy, _Compare>(
std::__sort4<_AlgPolicy, _Compare>(
__first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
return;
case 5:
std::__sort5_maybe_branchless<_AlgPolicy, _Compare>(
std::__sort5<_AlgPolicy, _Compare>(
__first,
__first + difference_type(1),
__first + difference_type(2),
@ -928,10 +885,8 @@ __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co
// Only use bitset partitioning for arithmetic types. We should also check
// that the default comparator is in use so that we are sure that there are no
// branches in the comparator.
std::__introsort<_AlgPolicy,
_Comp&,
_RandomAccessIterator,
__use_branchless_sort<_Comp, _RandomAccessIterator>::value>(__first, __last, __comp, __depth_limit);
std::__introsort<_AlgPolicy, _Comp&, _RandomAccessIterator, __use_branchless_sort<_Comp, _RandomAccessIterator> >(
__first, __last, __comp, __depth_limit);
}
template <class _Type, class... _Options>

View File

@ -362,6 +362,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> {
};
_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less);
template <class _Tp>
inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true;
template <class _Tp>
inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _Tp> = is_integral<_Tp>::value;
@ -377,6 +380,9 @@ struct _LIBCPP_TEMPLATE_VIS less<void> {
typedef void is_transparent;
};
template <class _Tp, class _Up>
inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Up> = true;
template <class _Tp>
inline const bool __desugars_to_v<__totally_ordered_less_tag, less<>, _Tp, _Tp> = is_integral<_Tp>::value;
#endif
@ -446,6 +452,9 @@ struct _LIBCPP_TEMPLATE_VIS greater : __binary_function<_Tp, _Tp, bool> {
};
_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(greater);
template <class _Tp>
inline const bool __desugars_to_v<__greater_tag, greater<_Tp>, _Tp, _Tp> = true;
#if _LIBCPP_STD_VER >= 14
template <>
struct _LIBCPP_TEMPLATE_VIS greater<void> {
@ -457,6 +466,9 @@ struct _LIBCPP_TEMPLATE_VIS greater<void> {
}
typedef void is_transparent;
};
template <class _Tp, class _Up>
inline const bool __desugars_to_v<__greater_tag, greater<>, _Tp, _Up> = true;
#endif
// Logical operations

View File

@ -102,6 +102,12 @@ inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = tru
template <class _Tp, class _Up>
inline const bool __desugars_to_v<__totally_ordered_less_tag, ranges::less, _Tp, _Up> = true;
template <class _Tp, class _Up>
inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true;
template <class _Tp, class _Up>
inline const bool __desugars_to_v<__greater_tag, ranges::greater, _Tp, _Up> = true;
#endif // _LIBCPP_STD_VER >= 20
_LIBCPP_END_NAMESPACE_STD

View File

@ -25,6 +25,12 @@ struct __equal_tag {};
// syntactically, the operation is equivalent to calling `a + b`
struct __plus_tag {};
// syntactically, the operation is equivalent to calling `a < b`
struct __less_tag {};
// syntactically, the operation is equivalent to calling `a > b`
struct __greater_tag {};
// syntactically, the operation is equivalent to calling `a < b`, and these expressions
// have to be true for any `a` and `b`:
// - `(a < b) == (b > a)`

View File

@ -27,10 +27,8 @@ template <class _Tp>
inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp);
#endif
#if _LIBCPP_STD_VER >= 20
template <class _Tp>
inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t);
#endif
inline const bool __is_cheap_to_copy = __is_trivially_copyable(_Tp) && sizeof(_Tp) <= sizeof(std::intmax_t);
_LIBCPP_END_NAMESPACE_STD

View File

@ -21,8 +21,7 @@ void __sort(RandomAccessIterator first, RandomAccessIterator last, Comp comp) {
std::__introsort<_ClassicAlgPolicy,
ranges::less,
RandomAccessIterator,
__use_branchless_sort<ranges::less, RandomAccessIterator>::value>(
first, last, ranges::less{}, depth_limit);
__use_branchless_sort<ranges::less, RandomAccessIterator>>(first, last, ranges::less{}, depth_limit);
}
// clang-format off