llvm-project/libcxx/include/__format/formatter_integral.h
Mark de Wever f7c0df002a [libc++][format] Improve format buffer.
Allow bulk output operations on the buffer instead of adding one
code unit at a time. This has a huge performance benefit at the cost of
larger binary. This doesn't implement @vitaut's earlier suggestion to
avoid buffering for std::string when writing a strings. That can be done
in a follow-up patch.

There are some minor complications for the non-buffered format_to_n.
When writing one character at a time it's easy to detect when reaching
the limit n. This is solved by adding a small overhead for format_to_n.
When the next write would overflow it stores the data in the internal
buffer and copies that up-to n code units. The overhead isn't measured,
but it's expected to only be an issue for small values of n; for larger
values the general improvements will outweight the new overhead.

```
   text	   data	    bss	    dec	    hex	filename
 349081	   6096	    440	 355617	  56d21	format.libcxx.out-baseline
 344442	   6088	    440	 350970	  55afa	formatted_size.libcxx.out-baseline
4567980	  57272	    424	4625676	 46950c	formatter_float.libcxx.out-baseline
 718800	  12472	    488	 731760	  b2a70	formatter_int.libcxx.out-baseline
 376341	   6096	    552	 382989	  5d80d	format_to.libcxx.out-beaseline

 370169	   6096	    440	 376705	  5bf81	format.libcxx.out
 365530	   6088	    440	 372058	  5ad5a	formatted_size.libcxx.out
4575116	  57272	    424	4632812	 46b0ec	formatter_float.libcxx.out
 725936	  12472	    488	 738896	  b4650	formatter_int.libcxx.out
 397429	   6096	    552	 404077	  62a6d	format_to.libcxx.out
```

For very small strings the new method is slower, from 4 characters
there's already a small gain.

```
Comparing ./format.libcxx.out-baseline to ./format.libcxx.out
Benchmark                                           Time             CPU      Time Old      Time New       CPU Old       CPU New
--------------------------------------------------------------------------------------------------------------------------------
BM_format_string<char>/1                         +0.0268         +0.0268            43            44            43            44
BM_format_string<char>/2                         +0.0133         +0.0133            22            22            22            22
BM_format_string<char>/4                         -0.0248         -0.0248            12            11            12            11
BM_format_string<char>/8                         -0.0831         -0.0831             6             6             6             6
BM_format_string<char>/16                        -0.2976         -0.2976             4             3             4             3
BM_format_string<char>/32                        -0.4369         -0.4369             3             2             3             2
BM_format_string<char>/64                        -0.6375         -0.6375             3             1             3             1
BM_format_string<char>/128                       -0.7685         -0.7685             2             1             2             1

```

The int benchmark has benefits for the simple formatting, but shines for
the complex formatting:
```
Comparing ./formatter_int.libcxx.out-baseline to ./formatter_int.libcxx.out
Benchmark                                                               Time             CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------------------
BM_Basic<uint32_t>                                                   -0.2307         -0.2307            60            46            60            46
BM_Basic<int32_t>                                                    -0.1985         -0.1985            61            49            61            49
BM_Basic<uint64_t>                                                   -0.3478         -0.3479            81            53            81            53
BM_Basic<int64_t>                                                    -0.3475         -0.3475            81            53            81            53
BM_BasicLow<__uint128_t>                                             -0.3388         -0.3388            86            57            86            57
BM_BasicLow<__int128_t>                                              -0.3431         -0.3431            86            57            86            57
BM_Basic<__uint128_t>                                                -0.2822         -0.2822           236           170           236           170
BM_Basic<__int128_t>                                                 -0.3107         -0.3107           219           151           219           151
Integral_LocFalse_BaseBin_AlignNone_Int64                            -0.5781         -0.5781           178            75           178            75
Integral_LocFalse_BaseBin_AlignmentLeft_Int64                        -0.9231         -0.9231          1156            89          1156            89
Integral_LocFalse_BaseBin_AlignmentCenter_Int64                      -0.9179         -0.9179          1107            91          1107            91
Integral_LocFalse_BaseBin_AlignmentRight_Int64                       -0.9238         -0.9238          1147            87          1147            87
Integral_LocFalse_BaseBin_ZeroPadding_Int64                          -0.9170         -0.9170          1137            94          1137            94
Integral_LocFalse_BaseBin_AlignNone_Uint64                           -0.5923         -0.5923           175            71           175            71
Integral_LocFalse_BaseBin_AlignmentLeft_Uint64                       -0.9251         -0.9251          1154            86          1154            86
Integral_LocFalse_BaseBin_AlignmentCenter_Uint64                     -0.9204         -0.9204          1105            88          1105            88
Integral_LocFalse_BaseBin_AlignmentRight_Uint64                      -0.9242         -0.9242          1125            85          1125            85
Integral_LocFalse_BaseBin_ZeroPadding_Uint64                         -0.9232         -0.9232          1139            88          1139            88
Integral_LocFalse_BaseOct_AlignNone_Int64                            -0.3241         -0.3241           100            67           100            67
Integral_LocFalse_BaseOct_AlignmentLeft_Int64                        -0.9322         -0.9322          1166            79          1166            79
Integral_LocFalse_BaseOct_AlignmentCenter_Int64                      -0.9251         -0.9251          1108            83          1108            83
Integral_LocFalse_BaseOct_AlignmentRight_Int64                       -0.9303         -0.9303          1136            79          1136            79
Integral_LocFalse_BaseOct_ZeroPadding_Int64                          -0.9264         -0.9264          1156            85          1156            85
Integral_LocFalse_BaseOct_AlignNone_Uint64                           -0.3116         -0.3116            96            66            96            66
Integral_LocFalse_BaseOct_AlignmentLeft_Uint64                       -0.9310         -0.9310          1168            81          1168            81
Integral_LocFalse_BaseOct_AlignmentCenter_Uint64                     -0.9281         -0.9281          1128            81          1128            81
Integral_LocFalse_BaseOct_AlignmentRight_Uint64                      -0.9299         -0.9299          1148            80          1148            80
Integral_LocFalse_BaseOct_ZeroPadding_Uint64                         -0.9288         -0.9288          1153            82          1153            82
Integral_LocFalse_BaseDec_AlignNone_Int64                            -0.3342         -0.3342            95            63            95            63
Integral_LocFalse_BaseDec_AlignmentLeft_Int64                        -0.9360         -0.9360          1157            74          1157            74
Integral_LocFalse_BaseDec_AlignmentCenter_Int64                      -0.9303         -0.9303          1128            79          1128            79
Integral_LocFalse_BaseDec_AlignmentRight_Int64                       -0.9369         -0.9369          1164            73          1164            73
Integral_LocFalse_BaseDec_ZeroPadding_Int64                          -0.9323         -0.9323          1157            78          1157            78
Integral_LocFalse_BaseDec_AlignNone_Uint64                           -0.3198         -0.3198            93            63            93            63
Integral_LocFalse_BaseDec_AlignmentLeft_Uint64                       -0.9351         -0.9351          1158            75          1158            75
Integral_LocFalse_BaseDec_AlignmentCenter_Uint64                     -0.9298         -0.9298          1128            79          1128            79
Integral_LocFalse_BaseDec_AlignmentRight_Uint64                      -0.9361         -0.9361          1157            74          1157            74
Integral_LocFalse_BaseDec_ZeroPadding_Uint64                         -0.9333         -0.9333          1151            77          1151            77
Integral_LocFalse_BaseHex_AlignNone_Int64                            -0.3020         -0.3020            89            62            89            62
Integral_LocFalse_BaseHex_AlignmentLeft_Int64                        -0.9357         -0.9357          1174            75          1174            75
Integral_LocFalse_BaseHex_AlignmentCenter_Int64                      -0.9319         -0.9319          1129            77          1129            77
Integral_LocFalse_BaseHex_AlignmentRight_Int64                       -0.9350         -0.9350          1161            75          1161            75
Integral_LocFalse_BaseHex_ZeroPadding_Int64                          -0.9293         -0.9293          1150            81          1150            81
Integral_LocFalse_BaseHex_AlignNone_Uint64                           -0.3056         -0.3057            86            59            86            59
Integral_LocFalse_BaseHex_AlignmentLeft_Uint64                       -0.9378         -0.9378          1174            73          1174            73
Integral_LocFalse_BaseHex_AlignmentCenter_Uint64                     -0.9341         -0.9341          1129            74          1130            74
Integral_LocFalse_BaseHex_AlignmentRight_Uint64                      -0.9361         -0.9361          1157            74          1157            74
Integral_LocFalse_BaseHex_ZeroPadding_Uint64                         -0.9315         -0.9315          1147            79          1147            79
Integral_LocFalse_BaseHexUpper_AlignNone_Int64                       -0.0019         -0.0019            91            90            91            90
Integral_LocFalse_BaseHexUpper_AlignmentLeft_Int64                   -0.9099         -0.9099          1162           105          1162           105
Integral_LocFalse_BaseHexUpper_AlignmentCenter_Int64                 -0.9041         -0.9041          1121           108          1121           108
Integral_LocFalse_BaseHexUpper_AlignmentRight_Int64                  -0.9086         -0.9086          1162           106          1162           106
Integral_LocFalse_BaseHexUpper_ZeroPadding_Int64                     -0.9057         -0.9057          1164           110          1164           110
Integral_LocFalse_BaseHexUpper_AlignNone_Uint64                      +0.0110         +0.0110            86            87            86            87
Integral_LocFalse_BaseHexUpper_AlignmentLeft_Uint64                  -0.9136         -0.9136          1161           100          1161           100
Integral_LocFalse_BaseHexUpper_AlignmentCenter_Uint64                -0.9078         -0.9078          1133           104          1133           104
Integral_LocFalse_BaseHexUpper_AlignmentRight_Uint64                 -0.9132         -0.9132          1177           102          1177           102
Integral_LocFalse_BaseHexUpper_ZeroPadding_Uint64                    -0.9091         -0.9091          1160           105          1160           105
```
Other benchmarks give similar results.

Reviewed By: #libc, ldionne

Differential Revision: https://reviews.llvm.org/D129964
2022-08-16 18:54:10 +02:00

363 lines
13 KiB
C++

// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
#define _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
#include <__concepts/arithmetic.h>
#include <__concepts/same_as.h>
#include <__config>
#include <__format/format_error.h>
#include <__format/formatter.h> // for __char_type TODO FMT Move the concept?
#include <__format/formatter_output.h>
#include <__format/parser_std_format_spec.h>
#include <__utility/unreachable.h>
#include <charconv>
#include <limits>
#include <string>
#ifndef _LIBCPP_HAS_NO_LOCALIZATION
# include <locale>
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
namespace __formatter {
//
// Generic
//
_LIBCPP_HIDE_FROM_ABI inline char* __insert_sign(char* __buf, bool __negative, __format_spec::__sign __sign) {
if (__negative)
*__buf++ = '-';
else
switch (__sign) {
case __format_spec::__sign::__default:
case __format_spec::__sign::__minus:
// No sign added.
break;
case __format_spec::__sign::__plus:
*__buf++ = '+';
break;
case __format_spec::__sign::__space:
*__buf++ = ' ';
break;
}
return __buf;
}
/**
* Determines the required grouping based on the size of the input.
*
* The grouping's last element will be repeated. For simplicity this repeating
* is unwrapped based on the length of the input. (When the input is short some
* groups are not processed.)
*
* @returns The size of the groups to write. This means the number of
* separator characters written is size() - 1.
*
* @note Since zero-sized groups cause issues they are silently ignored.
*
* @note The grouping field of the locale is always a @c std::string,
* regardless whether the @c std::numpunct's type is @c char or @c wchar_t.
*/
_LIBCPP_HIDE_FROM_ABI inline string __determine_grouping(ptrdiff_t __size, const string& __grouping) {
_LIBCPP_ASSERT(!__grouping.empty() && __size > __grouping[0],
"The slow grouping formatting is used while there will be no "
"separators written");
string __r;
auto __end = __grouping.end() - 1;
auto __ptr = __grouping.begin();
while (true) {
__size -= *__ptr;
if (__size > 0)
__r.push_back(*__ptr);
else {
// __size <= 0 so the value pushed will be <= *__ptr.
__r.push_back(*__ptr + __size);
return __r;
}
// Proceed to the next group.
if (__ptr != __end) {
do {
++__ptr;
// Skip grouping with a width of 0.
} while (*__ptr == 0 && __ptr != __end);
}
}
__libcpp_unreachable();
}
//
// Char
//
template <__formatter::__char_type _CharT>
_LIBCPP_HIDE_FROM_ABI auto __format_char(
integral auto __value,
output_iterator<const _CharT&> auto __out_it,
__format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
using _Tp = decltype(__value);
if constexpr (!same_as<_CharT, _Tp>) {
// cmp_less and cmp_greater can't be used for character types.
if constexpr (signed_integral<_CharT> == signed_integral<_Tp>) {
if (__value < numeric_limits<_CharT>::min() || __value > numeric_limits<_CharT>::max())
std::__throw_format_error("Integral value outside the range of the char type");
} else if constexpr (signed_integral<_CharT>) {
// _CharT is signed _Tp is unsigned
if (__value > static_cast<make_unsigned_t<_CharT>>(numeric_limits<_CharT>::max()))
std::__throw_format_error("Integral value outside the range of the char type");
} else {
// _CharT is unsigned _Tp is signed
if (__value < 0 || static_cast<make_unsigned_t<_Tp>>(__value) > numeric_limits<_CharT>::max())
std::__throw_format_error("Integral value outside the range of the char type");
}
}
const auto __c = static_cast<_CharT>(__value);
return __formatter::__write(_VSTD::addressof(__c), _VSTD::addressof(__c) + 1, _VSTD::move(__out_it), __specs);
}
//
// Integer
//
/** Wrapper around @ref to_chars, returning the output pointer. */
template <integral _Tp>
_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value, int __base) {
// TODO FMT Evaluate code overhead due to not calling the internal function
// directly. (Should be zero overhead.)
to_chars_result __r = _VSTD::to_chars(__first, __last, __value, __base);
_LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
return __r.ptr;
}
/**
* Helper to determine the buffer size to output a integer in Base @em x.
*
* There are several overloads for the supported bases. The function uses the
* base as template argument so it can be used in a constant expression.
*/
template <unsigned_integral _Tp, size_t _Base>
consteval size_t __buffer_size() noexcept
requires(_Base == 2)
{
return numeric_limits<_Tp>::digits // The number of binary digits.
+ 2 // Reserve space for the '0[Bb]' prefix.
+ 1; // Reserve space for the sign.
}
template <unsigned_integral _Tp, size_t _Base>
consteval size_t __buffer_size() noexcept
requires(_Base == 8)
{
return numeric_limits<_Tp>::digits // The number of binary digits.
/ 3 // Adjust to octal.
+ 1 // Turn floor to ceil.
+ 1 // Reserve space for the '0' prefix.
+ 1; // Reserve space for the sign.
}
template <unsigned_integral _Tp, size_t _Base>
consteval size_t __buffer_size() noexcept
requires(_Base == 10)
{
return numeric_limits<_Tp>::digits10 // The floored value.
+ 1 // Turn floor to ceil.
+ 1; // Reserve space for the sign.
}
template <unsigned_integral _Tp, size_t _Base>
consteval size_t __buffer_size() noexcept
requires(_Base == 16)
{
return numeric_limits<_Tp>::digits // The number of binary digits.
/ 4 // Adjust to hexadecimal.
+ 2 // Reserve space for the '0[Xx]' prefix.
+ 1; // Reserve space for the sign.
}
template <unsigned_integral _Tp, class _CharT>
_LIBCPP_HIDE_FROM_ABI auto __format_integer(
_Tp __value,
auto& __ctx,
__format_spec::__parsed_specifications<_CharT> __specs,
bool __negative,
char* __begin,
char* __end,
const char* __prefix,
int __base) -> decltype(__ctx.out()) {
char* __first = __formatter::__insert_sign(__begin, __negative, __specs.__std_.__sign_);
if (__specs.__std_.__alternate_form_ && __prefix)
while (*__prefix)
*__first++ = *__prefix++;
char* __last = __formatter::__to_buffer(__first, __end, __value, __base);
# ifndef _LIBCPP_HAS_NO_LOCALIZATION
if (__specs.__std_.__locale_specific_form_) {
const auto& __np = use_facet<numpunct<_CharT>>(__ctx.locale());
string __grouping = __np.grouping();
ptrdiff_t __size = __last - __first;
// Writing the grouped form has more overhead than the normal output
// routines. If there will be no separators written the locale-specific
// form is identical to the normal routine. Test whether to grouped form
// is required.
if (!__grouping.empty() && __size > __grouping[0])
return __formatter::__write_using_decimal_separators(
__ctx.out(),
__begin,
__first,
__last,
__formatter::__determine_grouping(__size, __grouping),
__np.thousands_sep(),
__specs);
}
# endif
auto __out_it = __ctx.out();
if (__specs.__alignment_ != __format_spec::__alignment::__zero_padding)
__first = __begin;
else {
// __buf contains [sign][prefix]data
// ^ location of __first
// The zero padding is done like:
// - Write [sign][prefix]
// - Write data right aligned with '0' as fill character.
__out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
__specs.__alignment_ = __format_spec::__alignment::__right;
__specs.__fill_ = _CharT('0');
int32_t __size = __first - __begin;
__specs.__width_ -= _VSTD::min(__size, __specs.__width_);
}
if (__specs.__std_.__type_ != __format_spec::__type::__hexadecimal_upper_case) [[likely]]
return __formatter::__write(__first, __last, __ctx.out(), __specs);
return __formatter::__write_transformed(__first, __last, __ctx.out(), __specs, __formatter::__hex_to_upper);
}
template <unsigned_integral _Tp, class _CharT>
_LIBCPP_HIDE_FROM_ABI auto __format_integer(
_Tp __value, auto& __ctx, __format_spec::__parsed_specifications<_CharT> __specs, bool __negative = false)
-> decltype(__ctx.out()) {
switch (__specs.__std_.__type_) {
case __format_spec::__type::__binary_lower_case: {
array<char, __formatter::__buffer_size<decltype(__value), 2>()> __array;
return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0b", 2);
}
case __format_spec::__type::__binary_upper_case: {
array<char, __formatter::__buffer_size<decltype(__value), 2>()> __array;
return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0B", 2);
}
case __format_spec::__type::__octal: {
// Octal is special; if __value == 0 there's no prefix.
array<char, __formatter::__buffer_size<decltype(__value), 8>()> __array;
return __formatter::__format_integer(
__value, __ctx, __specs, __negative, __array.begin(), __array.end(), __value != 0 ? "0" : nullptr, 8);
}
case __format_spec::__type::__default:
case __format_spec::__type::__decimal: {
array<char, __formatter::__buffer_size<decltype(__value), 10>()> __array;
return __formatter::__format_integer(
__value, __ctx, __specs, __negative, __array.begin(), __array.end(), nullptr, 10);
}
case __format_spec::__type::__hexadecimal_lower_case: {
array<char, __formatter::__buffer_size<decltype(__value), 16>()> __array;
return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0x", 16);
}
case __format_spec::__type::__hexadecimal_upper_case: {
array<char, __formatter::__buffer_size<decltype(__value), 16>()> __array;
return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0X", 16);
}
default:
_LIBCPP_ASSERT(false, "The parse function should have validated the type");
__libcpp_unreachable();
}
}
template <signed_integral _Tp, class _CharT>
_LIBCPP_HIDE_FROM_ABI auto
__format_integer(_Tp __value, auto& __ctx, __format_spec::__parsed_specifications<_CharT> __specs)
-> decltype(__ctx.out()) {
// Depending on the std-format-spec string the sign and the value
// might not be outputted together:
// - alternate form may insert a prefix string.
// - zero-padding may insert additional '0' characters.
// Therefore the value is processed as a positive unsigned value.
// The function @ref __insert_sign will a '-' when the value was negative.
auto __r = std::__to_unsigned_like(__value);
bool __negative = __value < 0;
if (__negative)
__r = __complement(__r);
return __formatter::__format_integer(__r, __ctx, __specs, __negative);
}
//
// Formatter arithmetic (bool)
//
template <class _CharT>
struct _LIBCPP_TEMPLATE_VIS __bool_strings;
template <>
struct _LIBCPP_TEMPLATE_VIS __bool_strings<char> {
static constexpr string_view __true{"true"};
static constexpr string_view __false{"false"};
};
# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
struct _LIBCPP_TEMPLATE_VIS __bool_strings<wchar_t> {
static constexpr wstring_view __true{L"true"};
static constexpr wstring_view __false{L"false"};
};
# endif
template <class _CharT>
_LIBCPP_HIDE_FROM_ABI auto
__format_bool(bool __value, auto& __ctx, __format_spec::__parsed_specifications<_CharT> __specs)
-> decltype(__ctx.out()) {
# ifndef _LIBCPP_HAS_NO_LOCALIZATION
if (__specs.__std_.__locale_specific_form_) {
const auto& __np = use_facet<numpunct<_CharT>>(__ctx.locale());
basic_string<_CharT> __str = __value ? __np.truename() : __np.falsename();
return __formatter::__write_string_no_precision(basic_string_view<_CharT>{__str}, __ctx.out(), __specs);
}
# endif
basic_string_view<_CharT> __str =
__value ? __formatter::__bool_strings<_CharT>::__true : __formatter::__bool_strings<_CharT>::__false;
return __formatter::__write(__str.begin(), __str.end(), __ctx.out(), __specs);
}
} // namespace __formatter
#endif //_LIBCPP_STD_VER > 17
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H